LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
nPr.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../../include/lammp/impl/ele_mul.h"
8#include "../../../include/lammp/impl/mparam.h"
9#include "../../../include/lammp/impl/prime_table.h"
10#include "../../../include/lammp/impl/longlong.h"
11
12#define mul_1(dst, rn, v) \
13 dst[rn] = lmmp_mul_1_(dst, dst, rn, v); \
14 ++rn; \
15 rn -= dst[rn - 1] == 0 ? 1 : 0
16
17static const ulong odd_factorial[25] = {1, 1, 3, 3, 15, 45, 315, 315,
18 2835, 14175, 155925,
19 467775, 6081075, 42567525,
20 638512875, 638512875, 10854718875, 97692469875,
21 1856156927625, 9280784638125, 194896477400625,
22 2143861251406875, 49308808782358125,
23 147926426347074375ull, 3698160658676859375ull};
24
26 mp_size_t shl = n - lmmp_limb_popcnt_(n);
27 shl -= (n - r) - lmmp_limb_popcnt_(n - r);
28 *bits = shl;
29 if (n < DBL_2POW_MANT_DIG_) {
30 double ln_perm = lgamma(n + 1.0) - lgamma(n - r + 1.0);
31 double log2_perm = ln_perm / LOG2_;
32 mp_size_t rn = ceil(log2_perm / LIMB_BITS) + 2; /* more two limbs */
33 return rn;
34 } else {
35 // nPr = n! / (n-r)! < n^r
36 return lmmp_pow_1_size_(n, r);
37 }
38}
39
40static inline uint count_factors(fac_ptr fac, uint nfactors, uint n, uint r, uint p) {
41 uint pn = n;
42 uint e = 0;
43 ulong inv = MP_ULONG_MAX / p + 1;
44 while (pn > 0) {
45 _udiv32by32_q_preinv(pn, pn, inv);
46 e += pn;
47 }
48 pn = r;
49 while (pn > 0) {
50 _udiv32by32_q_preinv(pn, pn, inv);
51 e -= pn;
52 }
53 if (e > 0) {
54 fac[nfactors].f = p;
55 fac[nfactors++].j = e;
56 }
57 return nfactors;
58}
59
60/**
61 * @brief 使用累乘函数计算nPr(奇数部分)
62 */
63static mp_size_t lmmp_odd_nPr_product_(mp_ptr restrict dst, mp_size_t rn, uint n, uint r) {
65 ulongp restrict limbs = TALLOC_TYPE(r / 2 + 1, ulong);
66 mp_size_t limbn = 0;
67 ulong t = 1, v;
68 mp_bitcnt_t cnt = 0;
69 for (ulong i = n - r + 1; i <= n; ++i) {
70 ctz_shl(v, i, cnt);
71 t *= v;
72 if (t > MP_UINT_MAX) {
73 limbs[limbn++] = t;
74 t = 1;
75 }
76 }
77 if (t != 1)
78 limbs[limbn++] = t;
79
80 mp_ptr restrict tp = TALLOC_TYPE(limbn * 2, mp_limb_t);
81 // 这里不能直接乘入dst,因为dst的大小可能小于limbn,导致溢出
82 rn = lmmp_elem_mul_ulong_(tp, limbs, limbn, tp + limbn);
83 lmmp_copy(dst, tp, rn);
85 return rn;
86}
87
89 lmmp_param_assert(n >= r);
91 if (n < ODD_FACTORIAL_SIZE) {
92 if (n == 0) {
93 dst[0] = 1;
94 } else if (n == r) {
95 dst[0] = odd_factorial[n - 1];
96 } else {
97 dst[0] = odd_factorial[n - 1] / odd_factorial[n - r - 1];
98 }
99 return 1;
100 } else if (r <= 10) {
101 dst[0] = 1;
102 rn = 1;
103 ulong t = 1, v;
104 ulong i = n - r + 1;
105 mp_bitcnt_t cnt = 0;
106 lmmp_debug_assert(n >= 3);
107 for (; i <= (ulong)n - 3; i += 3) {
108 t = i * (i + 1) * (i + 2);
109 ctz_shl(v, t, cnt);
110 mul_1(dst, rn, v);
111 }
112 t = 1;
113 for (; i <= n; ++i) {
114 t *= i;
115 }
116 ctz_shl(v, t, cnt);
117 if (v != 1) {
118 mul_1(dst, rn, v);
119 }
120 return rn;
121 } else if (n <= MP_UCHAR_MAX) {
122 lmmp_debug_assert(n >= 7);
123 lmmp_debug_assert(r >= 2);
124 dst[0] = 1;
125 rn = 1;
126 ulong t = 0, v;
127 ulong i = n - r + 1;
128 mp_bitcnt_t cnt;
129 for (; i <= (ulong)n - 7; i += 7) {
130 t = i * (i + 1) * (i + 2) * (i + 3) * (i + 4) * (i + 5) * (i + 6);
131 ctz_shl(v, t, cnt);
132 mul_1(dst, rn, v);
133 }
134 t = 1;
135 for (; i <= n; ++i) {
136 t *= i;
137 }
138 ctz_shl(v, t, cnt);
139 if (v != 1) {
140 mul_1(dst, rn, v);
141 }
142 return rn;
143 } else if (n <= 0xfff) {
145 ulongp restrict limbs = SALLOC_TYPE(r / 5 + 1, ulong);
146 mp_size_t limbn = 0;
147 ulong t, v;
148 ulong i = n - r + 1;
149 mp_bitcnt_t cnt;
150 lmmp_debug_assert(n >= 5);
151 for (; i <= (ulong)n - 5; i += 5) {
152 t = i * (i + 1) * (i + 2) * (i + 3) * (i + 4);
153 ctz_shl(v, t, cnt);
154 limbs[limbn++] = v;
155 }
156 t = 1;
157 for (; i <= n; ++i) {
158 t *= i;
159 }
160 ctz_shl(v, t, cnt);
161 if (v != 1)
162 limbs[limbn++] = v;
163 mp_ptr restrict tp = SALLOC_TYPE(limbn * 2, mp_limb_t);
164 // 这里不能直接乘入dst,因为dst的大小可能小于limbn,导致溢出
165 rn = lmmp_elem_mul_ulong_(tp, limbs, limbn, tp + limbn);
166 lmmp_copy(dst, tp, rn);
168 return rn;
169 } else if (rn < PERMUTATION_USHORT_MUL_THRESHOLD || n >= (PERMUTATION_USHORT_TIMES_THRESHOLD * r)) {
170 return lmmp_odd_nPr_product_(dst, rn, n, r);
171 } else {
172 TEMP_DECL;
173 uint primen = lmmp_prime_cnt16_(n);
174 uint nfactors = primen;
175 fac_ptr restrict fac = TALLOC_TYPE(nfactors, fac_t);
176 r = n - r;
177 nfactors = 0;
178 for (uint i = 1; i < primen; ++i) {
179 uint p = prime_short_table[i];
180 nfactors = count_factors(fac, nfactors, n, r, p);
181 }
182
183 rn = lmmp_factors_mul_(dst, rn, fac, nfactors);
184
185 TEMP_FREE;
186 return rn;
187 }
188}
189
191 lmmp_param_assert(n >= r);
193 if (r <= 10) {
194 dst[0] = 1;
195 rn = 1;
196 ulong v;
197 mp_bitcnt_t cnt;
198 for (ulong i = n - r + 1; i <= n; ++i) {
199 ctz_shl(v, i, cnt);
200 mul_1(dst, rn, v);
201 }
202 return rn;
203 } else if (rn < PERMUTATION_UINT_MUL_THRESHOLD || n >= (PERMUTATION_UINT_TIMES_THRESHOLD * r)) {
204 return lmmp_odd_nPr_product_(dst, rn, n, r);
205 } else{
207
209 uint nfactors = lmmp_prime_size_(n);
210 fac_ptr restrict fac = BALLOC_TYPE(nfactors, fac_t);
211 r = n - r;
212 nfactors = 0;
213
214 prime_cache_t cache;
215 lmmp_prime_cache_init_(&cache, n);
216 while(cache.is_end == 0) {
218 for (uint i = 0; i < cache.size; ++i) {
219 nfactors = count_factors(fac, nfactors, n, r, cache.pp[i]);
220 }
221 }
223
224 rn = lmmp_factors_mul_(dst, rn, fac, nfactors);
225
227 return rn;
228 }
229}
230
232 lmmp_param_assert(n >= r);
233 TEMP_DECL;
234 ulongp restrict limbs = TALLOC_TYPE(r + 1, ulong);
235 mp_size_t limbn = 0;
236 ulong t, v, m = 1;
237 mp_bitcnt_t cnt;
238 for (ulong i = 1; i <= r; ++i) {
239 t = n - r + i;
240 ctz_shl(v, t, cnt);
241 if (v != 1)
242 limbs[limbn++] = v;
243 }
244 if (m != 1)
245 limbs[limbn++] = m;
246
247 mp_ptr restrict tp = TALLOC_TYPE(limbn * 2, mp_limb_t);
248 // 这里不能直接乘入dst,因为dst的大小可能小于limbn,导致溢出
249 rn = lmmp_elem_mul_ulong_(tp, limbs, limbn, tp + limbn);
250 lmmp_copy(dst, tp, rn);
251 TEMP_FREE;
252 return rn;
253}
254
256 lmmp_debug_assert(n >= r);
257 mp_size_t shw = bits / LIMB_BITS;
258 bits %= LIMB_BITS;
259 lmmp_zero(dst, shw);
260 if (n <= NPR_SHORT_LIMIT)
261 rn = lmmp_odd_nPr_ushort_(dst + shw, rn - shw, n, r);
262 else if (n <= NPR_INT_LIMIT)
263 rn = lmmp_odd_nPr_uint_(dst + shw, rn - shw, n, r);
264 else
265 rn = lmmp_odd_nPr_ulong_(dst + shw, rn - shw, n, r);
266
267 if (bits > 0) {
268 dst[shw + rn] = lmmp_shl_(dst + shw, dst + shw, rn, bits);
269 rn += shw + 1;
270 rn -= dst[rn - 1] == 0;
271 } else {
272 rn += shw;
273 }
274 return rn;
275}
mp_size_t lmmp_elem_mul_ulong_(mp_ptr dst, const ulongp limbs, mp_size_t n, mp_ptr tp)
计算limbs数组的累乘积
uint f
Definition ele_mul.h:118
uint j
Definition ele_mul.h:119
mp_size_t lmmp_factors_mul_(mp_ptr dst, mp_size_t rn, fac_ptr fac, uint nfactors)
计算因子的累乘,并将结果放入dst中
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_copy(dst, src, n)
Definition lmmp.h:364
#define lmmp_zero(dst, n)
Definition lmmp.h:366
size_t mp_bitcnt_t
Definition lmmp.h:217
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
uint64_t mp_limb_t
Definition lmmp.h:211
#define LIMB_BITS
Definition lmmp.h:221
#define lmmp_param_assert(x)
Definition lmmp.h:398
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
int lmmp_limb_popcnt_(mp_limb_t x)
计算一个64位无符号整数中1的个数
Definition tiny.c:20
#define ctz_shl(r, x, cnt)
Definition longlong.h:229
#define _udiv32by32_q_preinv(q, n0, dinv)
Definition longlong.h:466
#define NPR_SHORT_LIMIT
Definition mparam.h:154
#define NPR_INT_LIMIT
Definition mparam.h:155
#define PERMUTATION_UINT_TIMES_THRESHOLD
Definition mparam.h:122
#define MP_UINT_MAX
Definition mparam.h:139
#define MP_UCHAR_MAX
Definition mparam.h:137
#define DBL_2POW_MANT_DIG_
Definition mparam.h:168
#define LOG2_
Definition mparam.h:165
#define MP_ULONG_MAX
Definition mparam.h:140
#define ODD_FACTORIAL_SIZE
Definition mparam.h:152
#define PERMUTATION_USHORT_TIMES_THRESHOLD
Definition mparam.h:119
#define tp
mp_size_t lmmp_nPr_size_(ulong n, ulong r, mp_bitcnt_t *restrict bits)
Definition nPr.c:25
#define mul_1(dst, rn, v)
Definition nPr.c:12
static mp_size_t lmmp_odd_nPr_product_(mp_ptr restrict dst, mp_size_t rn, uint n, uint r)
使用累乘函数计算nPr(奇数部分)
Definition nPr.c:63
mp_size_t lmmp_odd_nPr_ulong_(mp_ptr restrict dst, mp_size_t rn, ulong n, ulong r)
Definition nPr.c:231
mp_size_t lmmp_odd_nPr_ushort_(mp_ptr restrict dst, mp_size_t rn, ulong n, ulong r)
Definition nPr.c:88
mp_size_t lmmp_odd_nPr_uint_(mp_ptr restrict dst, mp_size_t rn, ulong n, ulong r)
Definition nPr.c:190
static const ulong odd_factorial[25]
Definition nPr.c:17
static uint count_factors(fac_ptr fac, uint nfactors, uint n, uint r, uint p)
Definition nPr.c:40
mp_size_t lmmp_nPr_(mp_ptr restrict dst, mp_bitcnt_t bits, mp_size_t rn, ulong n, ulong r)
Definition nPr.c:255
uint64_t * ulongp
Definition numth.h:45
uint32_t uint
Definition numth.h:35
static mp_size_t lmmp_pow_1_size_(mp_limb_t base, ulong exp)
计算幂次方需要的limb缓冲区长度 base ^ exp
Definition numth.h:264
uint64_t ulong
Definition numth.h:36
void lmmp_prime_cache_free_(prime_cache_t *cache)
释放素数表缓存
ulong lmmp_prime_size_(ulong n)
估计 n 范围内的素数数量
Definition prime_table.c:11
void lmmp_prime_cache_next_(prime_cache_t *cache)
素数表缓存更新(从小到大遍历全局质数表)
const ushort prime_short_table[6542]
void lmmp_prime_int_table_init_(uint n)
初始化全局素数表
Definition prime_table.c:70
ushort lmmp_prime_cnt16_(ushort n)
计算小于等于 n 的素数数量
void lmmp_prime_cache_init_(prime_cache_t *cache, uint n)
初始化素数表缓存
#define TEMP_DECL
Definition tmp_alloc.h:72
#define TEMP_FREE
Definition tmp_alloc.h:93
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TALLOC_TYPE(n, type)
Definition tmp_alloc.h:91
#define TEMP_B_DECL
Definition tmp_alloc.h:75
#define BALLOC_TYPE(n, type)
Definition tmp_alloc.h:89
#define TEMP_S_FREE
Definition tmp_alloc.h:105
#define TEMP_B_FREE
Definition tmp_alloc.h:100