LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
factorial.c 文件参考
+ factorial.c 的引用(Include)关系图:

浏览源代码.

宏定义

#define MUL(dst, ap, an, bp, bn)
 
#define mul_1(dst, rn, value)
 

函数

static void count_factors (fac_ptr fac, uint nfactors, uint n, uint p)
 
mp_size_t lmmp_factorial_ (mp_ptr restrict dst, mp_bitcnt_t bits, mp_size_t rn, uint n)
 
mp_size_t lmmp_factorial_size_ (uint n, mp_bitcnt_t *restrict bits)
 
mp_size_t lmmp_factors_mul_ (mp_ptr restrict dst, mp_size_t rn, fac_ptr restrict fac, uint nfactors)
 
mp_size_t lmmp_odd_factorial_uint_ (mp_ptr restrict dst, mp_size_t rn, uint n)
 

宏定义说明

◆ MUL

#define MUL (   dst,
  ap,
  an,
  bp,
  bn 
)
值:
if (an >= bn) \
lmmp_mul_(dst, ap, an, bp, bn); \
else \
lmmp_mul_(dst, bp, bn, ap, an)
#define an
#define bn

在文件 factorial.c15 行定义.

26 {
27 double ln_fact = lgamma(n + 1.0);
28 double log2_fact = ln_fact / LOG2_;
29 mp_size_t rn = ceil(log2_fact / LIMB_BITS) + 2; /* more two limbs */
30 *bits = n - lmmp_limb_popcnt_(n);
31 return rn;
32}
33
34/*
35 N N/2 N
36 +--+ / +--+ \ 2 / +--+ \
37 | | P_i ^ (e_i) = | | | P_i ^ (e_i / 2) | * | | | P_i ^ ( e_i mod 2) |
38 | | \ | | / \ | | /
39 i=0 i=0 i=0
40*/
41
42mp_size_t lmmp_factors_mul_(mp_ptr restrict dst, mp_size_t rn, fac_ptr restrict fac, uint nfactors) {
43 lmmp_param_assert(dst != NULL && fac != NULL);
44 lmmp_param_assert(rn > 0 && nfactors > 0);
45 if (nfactors <= 20) {
46 // 绝大多数情况下,大的质因数的指数都很小,所以这里只需要考虑小的质因数。
47 // 且随着递归的进行,进入这一步的质因数通常都很小。几乎不可能触发debug_assert
48 dst[0] = 1;
49 rn = 1;
50 for (uint i = 0; i < nfactors; i++) {
51 ulong f = fac[i].f;
52 uint j = fac[i].j;
53 lmmp_debug_assert(j != 0);
54 switch (j) {
55 case 1:
56 mul_1(dst, rn, f);
57 break;
58 case 2:
60 mul_1(dst, rn, f * f);
61 break;
62 case 3:
63 lmmp_debug_assert(f <= 2642245); // 2642245 = floor(B^(1/3))
64 mul_1(dst, rn, f * f * f);
65 break;
66 case 4:
68 mul_1(dst, rn, f * f * f * f);
69 break;
70 default:
72 ulong p2 = f * f;
73 ulong p4 = p2 * p2;
74 for (uint i = 0; i < j - 3; i += 4) {
75 mul_1(dst, rn, p4);
76 }
77 switch (j % 4) {
78 case 1:
79 mul_1(dst, rn, f);
80 break;
81 case 2:
82 mul_1(dst, rn, p2);
83 break;
84 case 3:
85 mul_1(dst, rn, p2 * f);
86 break;
87 default:
88 break;
89 }
90 break;
91 }
92 }
93 return rn;
94 } else {
96 mp_size_t new_nfactors = 0;
97 ulongp restrict limbs = TALLOC_TYPE(nfactors / 2 + 1, ulong);
98 mp_limb_t t = 1;
99 mp_size_t limbn = 0;
100 for (mp_size_t i = 0; i < nfactors; ++i) {
101 uint f = fac[i].f;
102 uint j = fac[i].j;
103 if (j > 1) {
104 fac[new_nfactors].f = f;
105 fac[new_nfactors++].j = j >> 1;
106 }
107 if (j & 1) {
108 t *= f;
109 if (t > MP_UINT_MAX) {
110 limbs[limbn++] = t;
111 t = 1;
112 }
113 }
114 }
115 if (t != 1) {
116 limbs[limbn++] = t;
117 }
118
119 mp_ptr restrict mp = TALLOC_TYPE(limbn * 2, mp_limb_t);
120 mp_size_t mpn = 0;
121
122 if (new_nfactors > 0) {
123 if (limbn > 0) {
124 mpn = lmmp_elem_mul_ulong_(mp, limbs, limbn, mp + limbn);
125 lmmp_debug_assert(rn >= mpn);
126 mp_size_t tn = ((rn - mpn) >> 1) + 1;
127 // 这里根据mpn的大小估计剩余因子乘积的长度,额外分配两倍的tn,以进行平方。
128 mp_ptr restrict tp = BALLOC_TYPE(3 * tn + 3, mp_limb_t);
129 tn = lmmp_factors_mul_(tp, tn, fac, new_nfactors);
130
131 mp_ptr restrict tp2 = tp + tn + 1;
132 lmmp_sqr_(tp2, tp, tn);
133 tn <<= 1;
134 tn -= tp2[tn - 1] == 0;
135 MUL(dst, tp2, tn, mp, mpn);
136 rn = tn + mpn;
137 rn -= dst[rn - 1] == 0;
138 } else {
139 mp_size_t tn = (rn >> 1) + 1;
140 mp_ptr restrict tp = TALLOC_TYPE(tn, mp_limb_t);
141 tn = lmmp_factors_mul_(tp, tn, fac, new_nfactors);
142 lmmp_sqr_(dst, tp, tn);
143 rn = tn << 1;
144 rn -= dst[rn - 1] == 0;
145 }
146 } else {
147 lmmp_debug_assert(limbn > 0);
148 // 这里不能直接乘入dst,因为dst的大小可能小于limbn,导致溢出
149 rn = lmmp_elem_mul_ulong_(mp + limbn, limbs, limbn, mp + limbn);
150 lmmp_copy(dst, mp, rn);
151 }
152 TEMP_FREE;
153 return rn;
154 }
155}
156
157static inline void count_factors(fac_ptr fac, uint nfactors, uint n, uint p) {
158 uint pn = n;
159 uint e = 0;
160 while (pn > 0) {
161 pn /= p;
162 e += pn;
163 }
164 fac[nfactors].f = p;
165 fac[nfactors].j = e;
166}
167
171 uint nfactors = lmmp_prime_size_(n);
172 fac_ptr restrict fac = BALLOC_TYPE(nfactors, fac_t);
173 nfactors = 0;
174
175 prime_cache_t cache;
176 lmmp_prime_cache_init_(&cache, n);
177 while(cache.is_end == 0) {
179 for (uint i = 0; i < cache.size; i++) {
180 count_factors(fac, nfactors++, n, cache.pp[i]);
181 }
182 }
184
185 rn = lmmp_factors_mul_(dst, rn, fac, nfactors);
186
188 return rn;
189}
190
191mp_size_t lmmp_factorial_(mp_ptr restrict dst, mp_bitcnt_t bits, mp_size_t rn, uint n) {
192 mp_size_t shw = bits / LIMB_BITS;
193 bits %= LIMB_BITS;
194 lmmp_zero(dst, shw);
195 if (n <= NPR_SHORT_LIMIT)
196 rn = lmmp_odd_nPr_ushort_(dst + shw, rn - shw, n, n);
197 else
198 rn = lmmp_odd_factorial_uint_(dst + shw, rn - shw, n);
199
200 if (bits > 0) {
201 dst[shw + rn] = lmmp_shl_(dst + shw, dst + shw, rn, bits);
202 rn += shw + 1;
203 rn -= dst[rn - 1] == 0;
204 } else {
205 rn += shw;
206 }
207 return rn;
208}
mp_size_t lmmp_elem_mul_ulong_(mp_ptr dst, const ulongp limbs, mp_size_t n, mp_ptr tp)
计算limbs数组的累乘积
uint f
Definition ele_mul.h:118
uint j
Definition ele_mul.h:119
mp_size_t lmmp_factorial_(mp_ptr restrict dst, mp_bitcnt_t bits, mp_size_t rn, uint n)
Definition factorial.c:191
#define MUL(dst, ap, an, bp, bn)
Definition factorial.c:15
mp_size_t lmmp_odd_factorial_uint_(mp_ptr restrict dst, mp_size_t rn, uint n)
Definition factorial.c:168
static void count_factors(fac_ptr fac, uint nfactors, uint n, uint p)
Definition factorial.c:157
#define mul_1(dst, rn, value)
Definition factorial.c:21
mp_size_t lmmp_factors_mul_(mp_ptr restrict dst, mp_size_t rn, fac_ptr restrict fac, uint nfactors)
Definition factorial.c:42
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_copy(dst, src, n)
Definition lmmp.h:364
#define lmmp_zero(dst, n)
Definition lmmp.h:366
size_t mp_bitcnt_t
Definition lmmp.h:217
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
uint64_t mp_limb_t
Definition lmmp.h:211
#define LIMB_BITS
Definition lmmp.h:221
#define lmmp_param_assert(x)
Definition lmmp.h:398
void lmmp_sqr_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
大数平方操作 [dst,2*na] = [numa,na]^2
Definition sqr.c:10
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
int lmmp_limb_popcnt_(mp_limb_t x)
计算一个64位无符号整数中1的个数
Definition tiny.c:20
#define p2
#define p4
#define NPR_SHORT_LIMIT
Definition mparam.h:154
#define MP_UINT_MAX
Definition mparam.h:139
#define MP_USHORT_MAX
Definition mparam.h:138
#define LOG2_
Definition mparam.h:165
#define tp
#define tp2
mp_size_t lmmp_odd_nPr_ushort_(mp_ptr dst, mp_size_t rn, ulong n, ulong r)
计算 nPr 排列数的奇数部分
uint64_t * ulongp
Definition numth.h:45
uint32_t uint
Definition numth.h:35
uint64_t ulong
Definition numth.h:36
void lmmp_prime_cache_free_(prime_cache_t *cache)
释放素数表缓存
ulong lmmp_prime_size_(ulong n)
估计 n 范围内的素数数量
Definition prime_table.c:11
void lmmp_prime_cache_next_(prime_cache_t *cache)
素数表缓存更新(从小到大遍历全局质数表)
void lmmp_prime_int_table_init_(uint n)
初始化全局素数表
Definition prime_table.c:70
void lmmp_prime_cache_init_(prime_cache_t *cache, uint n)
初始化素数表缓存
#define TEMP_DECL
Definition tmp_alloc.h:72
#define TEMP_FREE
Definition tmp_alloc.h:93
#define TALLOC_TYPE(n, type)
Definition tmp_alloc.h:91
#define TEMP_B_DECL
Definition tmp_alloc.h:75
#define BALLOC_TYPE(n, type)
Definition tmp_alloc.h:89
#define TEMP_B_FREE
Definition tmp_alloc.h:100

◆ mul_1

#define mul_1 (   dst,
  rn,
  value 
)
值:
dst[rn] = lmmp_mul_1_(dst, dst, rn, value); \
++rn; \
rn -= dst[rn - 1] == 0
mp_limb_t lmmp_mul_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数乘以单limb操作 [dst,na] = [numa,na] * x

在文件 factorial.c21 行定义.

函数说明

◆ count_factors()

static void count_factors ( fac_ptr  fac,
uint  nfactors,
uint  n,
uint  p 
)
inlinestatic

在文件 factorial.c157 行定义.

157 {
158 uint pn = n;
159 uint e = 0;
160 while (pn > 0) {
161 pn /= p;
162 e += pn;
163 }
164 fac[nfactors].f = p;
165 fac[nfactors].j = e;
166}

引用了 fac_t::f , 以及 fac_t::j.

被这些函数引用 lmmp_odd_factorial_uint_().

+ 这是这个函数的调用关系图:

◆ lmmp_factorial_()

mp_size_t lmmp_factorial_ ( mp_ptr restrict  dst,
mp_bitcnt_t  bits,
mp_size_t  rn,
uint  n 
)

在文件 factorial.c191 行定义.

191 {
192 mp_size_t shw = bits / LIMB_BITS;
193 bits %= LIMB_BITS;
194 lmmp_zero(dst, shw);
195 if (n <= NPR_SHORT_LIMIT)
196 rn = lmmp_odd_nPr_ushort_(dst + shw, rn - shw, n, n);
197 else
198 rn = lmmp_odd_factorial_uint_(dst + shw, rn - shw, n);
199
200 if (bits > 0) {
201 dst[shw + rn] = lmmp_shl_(dst + shw, dst + shw, rn, bits);
202 rn += shw + 1;
203 rn -= dst[rn - 1] == 0;
204 } else {
205 rn += shw;
206 }
207 return rn;
208}

引用了 LIMB_BITS, lmmp_odd_factorial_uint_(), lmmp_odd_nPr_ushort_(), lmmp_shl_(), lmmp_zero , 以及 NPR_SHORT_LIMIT.

+ 函数调用图:

◆ lmmp_factorial_size_()

mp_size_t lmmp_factorial_size_ ( uint  n,
mp_bitcnt_t *restrict  bits 
)

在文件 factorial.c26 行定义.

26 {
27 double ln_fact = lgamma(n + 1.0);
28 double log2_fact = ln_fact / LOG2_;
29 mp_size_t rn = ceil(log2_fact / LIMB_BITS) + 2; /* more two limbs */
30 *bits = n - lmmp_limb_popcnt_(n);
31 return rn;
32}

引用了 LIMB_BITS, lmmp_limb_popcnt_() , 以及 LOG2_.

+ 函数调用图:

◆ lmmp_factors_mul_()

mp_size_t lmmp_factors_mul_ ( mp_ptr restrict  dst,
mp_size_t  rn,
fac_ptr restrict  fac,
uint  nfactors 
)

在文件 factorial.c42 行定义.

42 {
43 lmmp_param_assert(dst != NULL && fac != NULL);
44 lmmp_param_assert(rn > 0 && nfactors > 0);
45 if (nfactors <= 20) {
46 // 绝大多数情况下,大的质因数的指数都很小,所以这里只需要考虑小的质因数。
47 // 且随着递归的进行,进入这一步的质因数通常都很小。几乎不可能触发debug_assert
48 dst[0] = 1;
49 rn = 1;
50 for (uint i = 0; i < nfactors; i++) {
51 ulong f = fac[i].f;
52 uint j = fac[i].j;
53 lmmp_debug_assert(j != 0);
54 switch (j) {
55 case 1:
56 mul_1(dst, rn, f);
57 break;
58 case 2:
60 mul_1(dst, rn, f * f);
61 break;
62 case 3:
63 lmmp_debug_assert(f <= 2642245); // 2642245 = floor(B^(1/3))
64 mul_1(dst, rn, f * f * f);
65 break;
66 case 4:
68 mul_1(dst, rn, f * f * f * f);
69 break;
70 default:
72 ulong p2 = f * f;
73 ulong p4 = p2 * p2;
74 for (uint i = 0; i < j - 3; i += 4) {
75 mul_1(dst, rn, p4);
76 }
77 switch (j % 4) {
78 case 1:
79 mul_1(dst, rn, f);
80 break;
81 case 2:
82 mul_1(dst, rn, p2);
83 break;
84 case 3:
85 mul_1(dst, rn, p2 * f);
86 break;
87 default:
88 break;
89 }
90 break;
91 }
92 }
93 return rn;
94 } else {
96 mp_size_t new_nfactors = 0;
97 ulongp restrict limbs = TALLOC_TYPE(nfactors / 2 + 1, ulong);
98 mp_limb_t t = 1;
99 mp_size_t limbn = 0;
100 for (mp_size_t i = 0; i < nfactors; ++i) {
101 uint f = fac[i].f;
102 uint j = fac[i].j;
103 if (j > 1) {
104 fac[new_nfactors].f = f;
105 fac[new_nfactors++].j = j >> 1;
106 }
107 if (j & 1) {
108 t *= f;
109 if (t > MP_UINT_MAX) {
110 limbs[limbn++] = t;
111 t = 1;
112 }
113 }
114 }
115 if (t != 1) {
116 limbs[limbn++] = t;
117 }
118
119 mp_ptr restrict mp = TALLOC_TYPE(limbn * 2, mp_limb_t);
120 mp_size_t mpn = 0;
121
122 if (new_nfactors > 0) {
123 if (limbn > 0) {
124 mpn = lmmp_elem_mul_ulong_(mp, limbs, limbn, mp + limbn);
125 lmmp_debug_assert(rn >= mpn);
126 mp_size_t tn = ((rn - mpn) >> 1) + 1;
127 // 这里根据mpn的大小估计剩余因子乘积的长度,额外分配两倍的tn,以进行平方。
128 mp_ptr restrict tp = BALLOC_TYPE(3 * tn + 3, mp_limb_t);
129 tn = lmmp_factors_mul_(tp, tn, fac, new_nfactors);
130
131 mp_ptr restrict tp2 = tp + tn + 1;
132 lmmp_sqr_(tp2, tp, tn);
133 tn <<= 1;
134 tn -= tp2[tn - 1] == 0;
135 MUL(dst, tp2, tn, mp, mpn);
136 rn = tn + mpn;
137 rn -= dst[rn - 1] == 0;
138 } else {
139 mp_size_t tn = (rn >> 1) + 1;
140 mp_ptr restrict tp = TALLOC_TYPE(tn, mp_limb_t);
141 tn = lmmp_factors_mul_(tp, tn, fac, new_nfactors);
142 lmmp_sqr_(dst, tp, tn);
143 rn = tn << 1;
144 rn -= dst[rn - 1] == 0;
145 }
146 } else {
147 lmmp_debug_assert(limbn > 0);
148 // 这里不能直接乘入dst,因为dst的大小可能小于limbn,导致溢出
149 rn = lmmp_elem_mul_ulong_(mp + limbn, limbs, limbn, mp + limbn);
150 lmmp_copy(dst, mp, rn);
151 }
152 TEMP_FREE;
153 return rn;
154 }
155}

引用了 BALLOC_TYPE, lmmp_copy, lmmp_debug_assert, lmmp_elem_mul_ulong_(), lmmp_factors_mul_(), lmmp_param_assert, lmmp_sqr_(), MP_UINT_MAX, MP_USHORT_MAX, MUL, mul_1, p2, p4, TALLOC_TYPE, TEMP_DECL, TEMP_FREE, tp , 以及 tp2.

被这些函数引用 lmmp_factors_mul_() , 以及 lmmp_odd_factorial_uint_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

◆ lmmp_odd_factorial_uint_()

mp_size_t lmmp_odd_factorial_uint_ ( mp_ptr restrict  dst,
mp_size_t  rn,
uint  n 
)

在文件 factorial.c168 行定义.

168 {
171 uint nfactors = lmmp_prime_size_(n);
172 fac_ptr restrict fac = BALLOC_TYPE(nfactors, fac_t);
173 nfactors = 0;
174
175 prime_cache_t cache;
176 lmmp_prime_cache_init_(&cache, n);
177 while(cache.is_end == 0) {
179 for (uint i = 0; i < cache.size; i++) {
180 count_factors(fac, nfactors++, n, cache.pp[i]);
181 }
182 }
184
185 rn = lmmp_factors_mul_(dst, rn, fac, nfactors);
186
188 return rn;
189}

引用了 BALLOC_TYPE, count_factors(), prime_cache_t::is_end, lmmp_factors_mul_(), lmmp_prime_cache_free_(), lmmp_prime_cache_init_(), lmmp_prime_cache_next_(), lmmp_prime_int_table_init_(), lmmp_prime_size_(), prime_cache_t::pp, prime_cache_t::size, TEMP_B_DECL , 以及 TEMP_B_FREE.

被这些函数引用 lmmp_factorial_().

+ 函数调用图:
+ 这是这个函数的调用关系图: