LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
sqrt.c 文件参考
+ sqrt.c 的引用(Include)关系图:

浏览源代码.

函数

static void lmmp_invsqrt_newton_ (mp_ptr dstis, mp_size_t ns, mp_srcptr numa, mp_size_t na)
 
void lmmp_sqrt_ (mp_ptr dsts, mp_ptr dstr, mp_srcptr numa, mp_size_t na, mp_size_t nf)
 大数平方根和取余操作
 
static mp_limb_t lmmp_sqrt_1_ (mp_ptr dsts, mp_limb_t x)
 
static mp_limb_t lmmp_sqrt_2_ (mp_ptr dsts, mp_ptr dstr, mp_srcptr numa)
 
static mp_limb_t lmmp_sqrt_divide_ (mp_ptr dsts, mp_ptr numa, mp_size_t ns, int nsh)
 
static void lmmp_sqrt_newton_ (mp_ptr dsts, mp_srcptr numa, mp_size_t na, mp_size_t nf)
 

变量

static const mp_byte_t lmmp_invsqrt_table_ []
 

函数说明

◆ lmmp_invsqrt_newton_()

static void lmmp_invsqrt_newton_ ( mp_ptr  dstis,
mp_size_t  ns,
mp_srcptr  numa,
mp_size_t  na 
)
static

在文件 sqrt.c147 行定义.

147 {
148 lmmp_param_assert(ns >= 3);
149 lmmp_param_assert(na > 0);
150 lmmp_param_assert(numa[na - 1] >= LIMB_B_4);
151 mp_size_t nr = ns, namax = na, mn;
152 mp_size_t sizes[LIMB_BITS], *sizp = sizes;
153
154 do {
155 *sizp = nr;
156 nr = (nr >> 1) + 1;
157 ++sizp;
158 } while (nr > 2);
159
160 numa += na;
161 dstis += ns;
162
163 // nr=2
164 // i2=floor((B^5-1)/(1+floor(sqrt(x*B^4))))
165 mp_limb_t numa2[6], sval[3];
166 lmmp_zero(numa2, 4);
167 numa2[5] = numa[-1];
168 if (na > 1)
169 numa2[4] = numa[-2];
170 else
171 numa2[4] = 0;
172 lmmp_sqrt_divide_(sval, numa2, 3, 0);
173 lmmp_inc(sval);
174 for (mp_size_t i = 0; i < 5; ++i) numa2[i] = LIMB_MAX;
175 dstis[0] = lmmp_div_s_(dstis - 2, numa2, 5, sval, 3);
176
177 TEMP_DECL;
178 mp_limb_t alloc_size = na + 2 * ns + 6;
179 mp_ptr xp = TALLOC_TYPE(alloc_size, mp_limb_t);
180 do {
181 na = *--sizp;
182
183 // ar = 0:[numa-nr,nr]
184 // an = 0:[numa-na,na]
185 // ir = 1:[dst-nr,nr] = floor(B^(3*nr/2)/sqrt(ar)) - [0|1]
186 // d = B^(na+2*nr)-an*ir*ir
187 // -4*B^(na+nr) < d < 4*B^(na+nr)
188
189 mp_size_t naz = LMMP_MIN(na, namax);
190 //mp_size_t zeros = na - naz;
191 mp_size_t nsqr, nres = naz + nr + 1;
192 mp_ptr dp = xp + 2 * nr + 1, dip = xp + nr + 1;
193 int cmod; // 1=mod b^mn-1, 0=mod b^(naz+nr+1)
194 int sign; // 1:d<0, 0:d>=0
195 mn = lmmp_fft_next_size_(nres);
196
197 // ir^2
198 if (2 * SQRT_NEWTON_MODM_THRESHOLD + mn >= nr * 2 + 1) {
199 cmod = 0;
200 lmmp_sqr_(xp, dstis - nr, nr + 1);
201 nsqr = 2 * nr + 1;
202 } else {
203 cmod = 1;
204 lmmp_mul_mersenne_(xp, mn, dstis - nr, nr + 1, dstis - nr, nr + 1);
205 nsqr = mn;
206 }
207
208 // ir^2*an
209 if (naz < SQRT_NEWTON_MODM_THRESHOLD || naz * 8 < nsqr || mn >= nsqr + naz) {
210 if (cmod == 0)
211 nsqr = LMMP_MIN(nsqr, nres);
212 lmmp_mul_(dp, xp, nsqr, numa - naz, naz);
213 if (cmod == 1) {
214 if (lmmp_add_(dp, dp, mn, dp + mn, naz))
215 lmmp_inc(dp);
216 }
217 } else {
218 if (nsqr > mn) { // cmod==0
219 if (lmmp_add_(xp, xp, mn, xp + mn, nsqr - mn))
220 lmmp_inc(xp);
221 }
222 lmmp_mul_mersenne_(dp, mn, xp, nsqr, numa - naz, naz);
223 cmod = 1;
224 }
225
226 if (cmod == 1) {
227 // naz+nr < mn <= naz+2*nr
228 //[dp,mn] -= B^(naz+2*nr) mod (B^mn-1)
229 dp[mn] = 1;
230 lmmp_dec(dp + naz + 2 * nr - mn);
231 if (dp[mn] == 0)
232 lmmp_dec(dp);
233 }
234
235 if (dp[nres - 1] > 3) { //-d<0
236 if (cmod == 0)
237 lmmp_dec(dp); // for neg to not
238 // else (neg to not) compensate (mod transfer)
239 dp += naz;
240 lmmp_shlnot_(xp, dp + 1, nr, LIMB_BITS - 1);
241 xp[0] ^= dp[0] >> 1;
242 xp[nr] = ~dp[nr] >> 1;
243 sign = 0;
244 } else { //-d>0
245 lmmp_shr_(xp, dp + naz, nr + 1, 1);
246 if ((dp[naz] & 1) || !lmmp_zero_q_(dp, naz))
247 lmmp_inc(xp);
248 sign = 1;
249 }
250
251 lmmp_mul_n_(dip, xp, dstis - nr, nr + 1);
252
253 if (sign) {
254 if (lmmp_zero_q_(dip, 3 * nr - na)) {
255 // a limit for dec
256 dip[2 * nr + 1] = 1;
257 lmmp_dec(dip + 3 * nr - na);
258 }
259 lmmp_not_(dstis - na, dip + 3 * nr - na, na - nr);
260 lmmp_dec_1(dstis - nr, dip[2 * nr] + 1);
261 } else {
262 lmmp_copy(dstis - na, dip + 3 * nr - na, na - nr);
263 lmmp_inc_1(dstis - nr, dip[2 * nr]);
264 }
265
266 nr = na;
267 } while (sizp != sizes);
268 TEMP_FREE;
269}
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_copy(dst, src, n)
Definition lmmp.h:364
#define lmmp_zero(dst, n)
Definition lmmp.h:366
uint64_t mp_size_t
Definition lmmp.h:212
#define LIMB_MAX
Definition lmmp.h:224
uint64_t mp_limb_t
Definition lmmp.h:211
#define LMMP_MIN(l, o)
Definition lmmp.h:348
#define LIMB_BITS
Definition lmmp.h:221
#define lmmp_param_assert(x)
Definition lmmp.h:398
mp_limb_t lmmp_shlnot_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
左移后按位取反操作 [dst,na] = ~([numa,na] << shl),dst的低shl位填充1
mp_limb_t lmmp_div_s_(mp_ptr dstq, mp_ptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
除法运算
Definition div.c:11
void lmmp_mul_mersenne_(mp_ptr dst, mp_size_t rn, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
梅森数模乘法 [dst,rn] = [numa,na]*[numb,nb] mod B^rn-1
Definition mul_fft.c:752
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
#define lmmp_dec(p)
大数减1宏(预期无借位)
Definition lmmpn.h:973
#define lmmp_inc(p)
大数加1宏(预期无进位)
Definition lmmpn.h:946
mp_limb_t lmmp_shr_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shr)
大数右移操作 [dst,na] = [numa,na] >> shr,dst的高shr位填充0
Definition shr.c:9
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_sqr_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
大数平方操作 [dst,2*na] = [numa,na]^2
Definition sqr.c:10
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
mp_size_t lmmp_fft_next_size_(mp_size_t n)
计算满足 >=n 的最小费马/梅森乘法可行尺寸
Definition mul_fft.c:84
#define lmmp_dec_1(p, dec)
大数减指定值宏(预期无借位)
Definition lmmpn.h:985
void lmmp_not_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
大数按位取反操作 [dst,na] = ~[numa,na] (对每个limb执行按位非操作)
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
static int lmmp_zero_q_(mp_srcptr p, mp_size_t n)
大数判零函数(内联)
Definition lmmpn.h:1027
#define LIMB_B_4
Definition mparam.h:162
#define SQRT_NEWTON_MODM_THRESHOLD
Definition mparam.h:43
static mp_limb_t lmmp_sqrt_divide_(mp_ptr dsts, mp_ptr numa, mp_size_t ns, int nsh)
Definition sqrt.c:107
#define TEMP_DECL
Definition tmp_alloc.h:72
#define TEMP_FREE
Definition tmp_alloc.h:93
#define TALLOC_TYPE(n, type)
Definition tmp_alloc.h:91

引用了 LIMB_B_4, LIMB_BITS, LIMB_MAX, lmmp_add_(), lmmp_copy, lmmp_dec, lmmp_dec_1, lmmp_div_s_(), lmmp_fft_next_size_(), lmmp_inc, lmmp_inc_1, LMMP_MIN, lmmp_mul_(), lmmp_mul_mersenne_(), lmmp_mul_n_(), lmmp_not_(), lmmp_param_assert, lmmp_shlnot_(), lmmp_shr_(), lmmp_sqr_(), lmmp_sqrt_divide_(), lmmp_zero, lmmp_zero_q_(), SQRT_NEWTON_MODM_THRESHOLD, TALLOC_TYPE, TEMP_DECL , 以及 TEMP_FREE.

被这些函数引用 lmmp_sqrt_newton_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

◆ lmmp_sqrt_()

void lmmp_sqrt_ ( mp_ptr  dsts,
mp_ptr  dstr,
mp_srcptr  numa,
mp_size_t  na,
mp_size_t  nf 
)

大数平方根和取余操作

注解
如果dstr不为NULL: [dsts,nf+na/2+1], [dstr,nf+na/2+1] = sqrtrem([numa,na]*B^(2*nf)) 也即 [numa,na] × B^(2×nf) = [dsts,nf+na/2+1]^2 + [dstr,nf+na/2+1] 且 0 <= [dstr,nf+na/2+1] < 2 * [dsts,nf+na/2+1] + 1 如果dstr为NULL: [dsts,nf+na/2+1] = [round|floor](sqrt([numa,na]*B^(2*nf)))
警告
na>0, numa[na-1]!=0, eqsep(dsts,numa), eqsep(dstr,numa)
参数
dsts平方根结果输出指针
dstr余数结果输出指针(NULL表示不计算余数)
numa源操作数指针
na操作数的 limb 长度
nf精度因子

在文件 sqrt.c323 行定义.

323 {
324 lmmp_debug_assert(na > 0);
325 lmmp_debug_assert(numa[na - 1] > 0);
326 mp_limb_t high = numa[na - 1];
327 int nsh = lmmp_leading_zeros_(high) / 2;
328 mp_size_t nl = na + 2 * nf;
329 if (nl == 1) {
330 mp_limb_t srt;
331 lmmp_sqrt_1_(&srt, high << nsh * 2);
332 srt >>= nsh;
333 dsts[0] = srt;
334 if (dstr)
335 dstr[0] = high - srt * srt;
336 } else if (!dstr && nf >= 10 * na + SQRT_NEWTON_THRESHOLD) {
337 lmmp_sqrt_newton_(dsts, numa, na, nf);
338 } else {
339 TEMP_DECL;
340 mp_limb_t ns = (nl + 1) / 2;
341 mp_ptr numa2 = TALLOC_TYPE(2 * ns, mp_limb_t);
342 if (nf)
343 lmmp_zero(numa2, 2 * nf);
344 if (nsh)
345 lmmp_shl_(numa2 + 2 * ns - na, numa, na, nsh * 2);
346 else
347 lmmp_copy(numa2 + 2 * ns - na, numa, na);
348 if (nl & 1) {
349 numa2[2 * nf] = 0;
350 nsh += LIMB_BITS / 2;
351 } else {
352 dsts[ns] = 0;
353 }
354 mp_limb_t rh = lmmp_sqrt_divide_(dsts, numa2, ns, dstr ? 0 : nsh);
355 if (nsh) {
356 if (dstr) {
357 mp_limb_t ds = dsts[0] & (((mp_limb_t)1 << nsh) - 1);
358 rh += lmmp_addmul_1_(numa2, dsts, ns, 2 * ds);
359 mp_limb_t b = lmmp_submul_1_(numa2, &ds, 1, ds);
360 if (ns == 1)
361 rh -= b;
362 else
363 rh -= lmmp_sub_1_(numa2 + 1, numa2 + 1, ns - 1, b);
364 }
365 lmmp_shr_(dsts, dsts, ns, nsh);
366 }
367 if (dstr) {
368 numa2[ns] = rh;
369 nsh *= 2;
370 if (nsh >= LIMB_BITS) {
371 nsh -= LIMB_BITS;
372 ++numa2;
373 } else
374 ++ns;
375 if (nsh)
376 lmmp_shr_(dstr, numa2, ns, nsh);
377 else
378 lmmp_copy(dstr, numa2, ns);
379 }
380
381 TEMP_FREE;
382 }
383}
#define lmmp_debug_assert(x)
Definition lmmp.h:387
int lmmp_leading_zeros_(mp_limb_t x)
计算一个单精度数(limb)中前导零的个数
Definition tiny.c:35
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
mp_limb_t lmmp_addmul_1_(mp_ptr numa, mp_srcptr numb, mp_size_t n, mp_limb_t b)
大数乘以单limb并累加操作 [numa,n] += [numb,n] * b
static mp_limb_t lmmp_sub_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数减单精度数静态内联函数 [dst,na]=[numa,na]-x
Definition lmmpn.h:1122
mp_limb_t lmmp_submul_1_(mp_ptr numa, mp_srcptr numb, mp_size_t n, mp_limb_t b)
大数乘以单limb并累减操作 [numa,n] -= [numb,n] * b
#define SQRT_NEWTON_THRESHOLD
Definition mparam.h:41
static mp_limb_t lmmp_sqrt_1_(mp_ptr dsts, mp_limb_t x)
Definition sqrt.c:38
static void lmmp_sqrt_newton_(mp_ptr dsts, mp_srcptr numa, mp_size_t na, mp_size_t nf)
Definition sqrt.c:275

引用了 LIMB_BITS, lmmp_addmul_1_(), lmmp_copy, lmmp_debug_assert, lmmp_leading_zeros_(), lmmp_shl_(), lmmp_shr_(), lmmp_sqrt_1_(), lmmp_sqrt_divide_(), lmmp_sqrt_newton_(), lmmp_sub_1_(), lmmp_submul_1_(), lmmp_zero, SQRT_NEWTON_THRESHOLD, TALLOC_TYPE, TEMP_DECL , 以及 TEMP_FREE.

+ 函数调用图:

◆ lmmp_sqrt_1_()

static mp_limb_t lmmp_sqrt_1_ ( mp_ptr  dsts,
mp_limb_t  x 
)
static

在文件 sqrt.c38 行定义.

38 {
40 mp_limb_t v, xh = x >> 24, s, s2;
41 mp_slimb_t t;
42
43 // round(sqrt(2^25/(1/2+floor(x/2^55))))
44 v = 256 + lmmp_invsqrt_table_[(x >> 55) - 128];
45
46 t = (((mp_limb_t)1 << 48) - ((x >> 32) + 1) * v * v) * v;
47 v = (v << 16) + (t >> 33);
48
49 s = v * xh;
50 s2 = (s >> 28) + 1;
51 t = (xh << 32) - s2 * s2;
52 s = s + v * (t >> 33);
53
54 // we proved that -0.616 < s/2^32 - sqrt(x) < 0
55 // so (s>>32) will be either floor(sqrt(x)), or 1 too small
56 s >>= 32;
57 x -= s * s;
58
59 if (x >= 2 * s + 1) {
60 x -= 2 * s + 1;
61 ++s;
62 }
63
64 *dsts = s;
65 return x;
66}
int64_t mp_slimb_t
Definition lmmp.h:213
#define s2
static const mp_byte_t lmmp_invsqrt_table_[]
Definition sqrt.c:12

引用了 LIMB_B_4, lmmp_invsqrt_table_, lmmp_param_assert , 以及 s2.

被这些函数引用 lmmp_sqrt_() , 以及 lmmp_sqrt_2_().

+ 这是这个函数的调用关系图:

◆ lmmp_sqrt_2_()

static mp_limb_t lmmp_sqrt_2_ ( mp_ptr  dsts,
mp_ptr  dstr,
mp_srcptr  numa 
)
static

在文件 sqrt.c70 行定义.

70 {
71 lmmp_param_assert(numa[1] >= LIMB_B_4);
72 mp_limb_t rl, s, q, al, u;
73 mp_slimb_t rh;
74
75 rl = lmmp_sqrt_1_(&s, numa[1]);
76 al = numa[0];
77
78 //(r:alh)/2
79 rl = rl << 31 | al >> 33;
80 q = rl / s;
81 q -= q >> 32;
82
83 u = rl - s * q;
84 s = s << 32 | q;
85 rh = u >> 31;
86 rl = (u << 33) | (al & (((mp_limb_t)1 << 33) - 1));
87
88 q *= q;
89 rh -= rl < q;
90 rl -= q;
91 if (rh < 0) {
92 rl += s;
93 rh += rl < s;
94 --s;
95 rl += s;
96 rh += rl < s;
97 }
98
99 dsts[0] = s;
100 dstr[0] = rl;
101 return rh;
102}

引用了 LIMB_B_4, lmmp_param_assert , 以及 lmmp_sqrt_1_().

被这些函数引用 lmmp_sqrt_divide_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

◆ lmmp_sqrt_divide_()

static mp_limb_t lmmp_sqrt_divide_ ( mp_ptr  dsts,
mp_ptr  numa,
mp_size_t  ns,
int  nsh 
)
static

在文件 sqrt.c107 行定义.

107 {
108 lmmp_param_assert(ns > 0);
109 lmmp_param_assert(nsh >= 0 && nsh < LIMB_BITS);
110 lmmp_param_assert(numa[2 * ns - 1] >= LIMB_B_4);
111 mp_slimb_t rh;
112 if (ns == 1) {
113 rh = lmmp_sqrt_2_(dsts, numa, numa);
114 } else {
115 mp_size_t lo = ns / 2, hi = ns - lo;
116 mp_limb_t qh = lmmp_sqrt_divide_(dsts + lo, numa + 2 * lo, hi, 0);
117 if (qh)
118 lmmp_sub_n_(numa + 2 * lo, numa + 2 * lo, dsts + lo, hi);
119 qh += lmmp_div_s_(dsts, numa + lo, ns, dsts + lo, hi);
120 rh = lmmp_shr_c_(dsts, dsts, lo, 1, qh << (LIMB_BITS - 1));
121 // now dsts is either correct or 1 too big,
122 // if nsh-LSBs are non-zero, subtracting 1
123 // will not affect anything after de-normalization
124 if (dsts[0] & (((mp_limb_t)1 << nsh) - 1))
125 return 1;
126 if (rh)
127 rh = lmmp_add_n_(numa + lo, numa + lo, dsts + lo, hi);
128 qh >>= 1;
129 lmmp_sqr_(numa + ns, dsts, lo);
130 mp_limb_t b = qh + lmmp_sub_n_(numa, numa, numa + ns, lo * 2);
131 if (lo == hi)
132 rh -= b;
133 else
134 rh -= lmmp_sub_1_(numa + 2 * lo, numa + 2 * lo, 1, b);
135 if (rh < 0) {
136 qh = lmmp_add_1_(dsts + lo, dsts + lo, hi, qh);
137 rh += 2 * qh + lmmp_addshl1_n_(numa, numa, dsts, ns);
138 rh -= lmmp_sub_1_(numa, numa, ns, 1);
139 qh -= lmmp_sub_1_(dsts, dsts, ns, 1);
140 }
141 }
142 return rh;
143}
mp_limb_t lmmp_shr_c_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shr, mp_limb_t c)
带进位的大数右移操作 [dst,na] = [numa,na]>>shr,dst的高shr位填充c的高shr位
Definition shr.c:30
static mp_limb_t lmmp_add_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数加单精度数静态内联函数 [dst,na]=[numa,na]+x
Definition lmmpn.h:1111
mp_limb_t lmmp_addshl1_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法结合左移1位操作 [dst,n] = [numa,n] + ([numb,n] << 1)
Definition shl.c:56
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
static mp_limb_t lmmp_sqrt_2_(mp_ptr dsts, mp_ptr dstr, mp_srcptr numa)
Definition sqrt.c:70

引用了 LIMB_B_4, LIMB_BITS, lmmp_add_1_(), lmmp_add_n_(), lmmp_addshl1_n_(), lmmp_div_s_(), lmmp_param_assert, lmmp_shr_c_(), lmmp_sqr_(), lmmp_sqrt_2_(), lmmp_sqrt_divide_(), lmmp_sub_1_() , 以及 lmmp_sub_n_().

被这些函数引用 lmmp_invsqrt_newton_(), lmmp_sqrt_() , 以及 lmmp_sqrt_divide_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

◆ lmmp_sqrt_newton_()

static void lmmp_sqrt_newton_ ( mp_ptr  dsts,
mp_srcptr  numa,
mp_size_t  na,
mp_size_t  nf 
)
static

在文件 sqrt.c275 行定义.

275 {
276 lmmp_param_assert(na > 0);
277 lmmp_param_assert(nf >= 2);
278 mp_limb_t high = numa[na - 1];
279 int nsh = lmmp_leading_zeros_(high) / 2;
280 mp_size_t ns = na / 2 + 1 + nf;
281
282 TEMP_DECL;
283 mp_limb_t alloc_size = (nsh ? na : 0) + ns + 1;
284 mp_ptr tp = TALLOC_TYPE(alloc_size, mp_limb_t), numa2;
285 if (nsh) {
286 numa2 = tp;
287 lmmp_shl_(numa2, numa, na, nsh * 2);
288 tp += na;
289 } else
290 numa2 = (mp_ptr)numa;
291
292 lmmp_invsqrt_newton_(tp, ns, numa2, na);
293
294 mp_ptr msqr = TALLOC_TYPE(na + ns + 1, mp_limb_t);
295
296 if (ns + 1 > na)
297 lmmp_mul_(msqr, tp, ns + 1, numa2, na);
298 else
299 lmmp_mul_(msqr, numa2, na, tp, ns + 1);
300
301 mp_limb_t cceil;
302 if (na & 1) {
303 nsh += LIMB_BITS / 2;
304 lmmp_shr_(dsts, msqr + na, ns, nsh);
305 cceil = msqr[na] >> (nsh - 1);
306 } else {
307 if (nsh) {
308 lmmp_shr_(dsts, msqr + na + 1, ns - 1, nsh);
309 cceil = msqr[na + 1] >> (nsh - 1);
310 } else {
311 lmmp_copy(dsts, msqr + na + 1, ns - 1);
312 cceil = msqr[na] >> (LIMB_BITS - 1);
313 }
314 dsts[ns - 1] = 0;
315 }
316
317 if (cceil & 1)
318 lmmp_inc(dsts);
319
320 TEMP_FREE;
321}
#define tp
static void lmmp_invsqrt_newton_(mp_ptr dstis, mp_size_t ns, mp_srcptr numa, mp_size_t na)
Definition sqrt.c:147

引用了 LIMB_BITS, lmmp_copy, lmmp_inc, lmmp_invsqrt_newton_(), lmmp_leading_zeros_(), lmmp_mul_(), lmmp_param_assert, lmmp_shl_(), lmmp_shr_(), TALLOC_TYPE, TEMP_DECL, TEMP_FREE , 以及 tp.

被这些函数引用 lmmp_sqrt_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

变量说明

◆ lmmp_invsqrt_table_

const mp_byte_t lmmp_invsqrt_table_[]
static
初始值:
= {
0xff, 0xfd, 0xfb, 0xf9, 0xf7, 0xf5, 0xf3, 0xf2, 0xf0, 0xee, 0xec, 0xea, 0xe9, 0xe7, 0xe5, 0xe4, 0xe2, 0xe0, 0xdf,
0xdd, 0xdb, 0xda, 0xd8, 0xd7, 0xd5, 0xd4, 0xd2, 0xd1, 0xcf, 0xce, 0xcc, 0xcb, 0xc9, 0xc8, 0xc6, 0xc5, 0xc4, 0xc2,
0xc1, 0xc0, 0xbe, 0xbd, 0xbc, 0xba, 0xb9, 0xb8, 0xb7, 0xb5, 0xb4, 0xb3, 0xb2, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xaa,
0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96,
0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84,
0x83, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x79, 0x78, 0x77, 0x76, 0x76, 0x75,
0x74, 0x73, 0x72, 0x72, 0x71, 0x70, 0x6f, 0x6f, 0x6e, 0x6d, 0x6d, 0x6c, 0x6b, 0x6a, 0x6a, 0x69, 0x68, 0x68, 0x67,
0x66, 0x66, 0x65, 0x64, 0x64, 0x63, 0x62, 0x62, 0x61, 0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5d, 0x5c, 0x5c, 0x5b, 0x5a,
0x5a, 0x59, 0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x55, 0x54, 0x54, 0x53, 0x53, 0x52, 0x52, 0x51, 0x50, 0x50, 0x4f,
0x4f, 0x4e, 0x4e, 0x4d, 0x4d, 0x4c, 0x4b, 0x4b, 0x4a, 0x4a, 0x49, 0x49, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46, 0x45,
0x45, 0x44, 0x44, 0x43, 0x43, 0x42, 0x42, 0x41, 0x41, 0x40, 0x40, 0x3f, 0x3f, 0x3e, 0x3e, 0x3d, 0x3d, 0x3c, 0x3c,
0x3b, 0x3b, 0x3a, 0x3a, 0x39, 0x39, 0x39, 0x38, 0x38, 0x37, 0x37, 0x36, 0x36, 0x35, 0x35, 0x35, 0x34, 0x34, 0x33,
0x33, 0x32, 0x32, 0x32, 0x31, 0x31, 0x30, 0x30, 0x2f, 0x2f, 0x2f, 0x2e, 0x2e, 0x2d, 0x2d, 0x2d, 0x2c, 0x2c, 0x2b,
0x2b, 0x2b, 0x2a, 0x2a, 0x29, 0x29, 0x29, 0x28, 0x28, 0x27, 0x27, 0x27, 0x26, 0x26, 0x26, 0x25, 0x25, 0x24, 0x24,
0x24, 0x23, 0x23, 0x23, 0x22, 0x22, 0x21, 0x21, 0x21, 0x20, 0x20, 0x20, 0x1f, 0x1f, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d,
0x1d, 0x1d, 0x1c, 0x1c, 0x1b, 0x1b, 0x1b, 0x1a, 0x1a, 0x1a, 0x19, 0x19, 0x19, 0x18, 0x18, 0x18, 0x18, 0x17, 0x17,
0x17, 0x16, 0x16, 0x16, 0x15, 0x15, 0x15, 0x14, 0x14, 0x14, 0x13, 0x13, 0x13, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11,
0x11, 0x10, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0c, 0x0c, 0x0b,
0x0b, 0x0b, 0x0a, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x06, 0x06,
0x06, 0x06, 0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01,
0x01, 0x01, 0x00, 0x00}

在文件 sqrt.c12 行定义.

12 {
13 0xff, 0xfd, 0xfb, 0xf9, 0xf7, 0xf5, 0xf3, 0xf2, 0xf0, 0xee, 0xec, 0xea, 0xe9, 0xe7, 0xe5, 0xe4, 0xe2, 0xe0, 0xdf,
14 0xdd, 0xdb, 0xda, 0xd8, 0xd7, 0xd5, 0xd4, 0xd2, 0xd1, 0xcf, 0xce, 0xcc, 0xcb, 0xc9, 0xc8, 0xc6, 0xc5, 0xc4, 0xc2,
15 0xc1, 0xc0, 0xbe, 0xbd, 0xbc, 0xba, 0xb9, 0xb8, 0xb7, 0xb5, 0xb4, 0xb3, 0xb2, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xaa,
16 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96,
17 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84,
18 0x83, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x79, 0x78, 0x77, 0x76, 0x76, 0x75,
19 0x74, 0x73, 0x72, 0x72, 0x71, 0x70, 0x6f, 0x6f, 0x6e, 0x6d, 0x6d, 0x6c, 0x6b, 0x6a, 0x6a, 0x69, 0x68, 0x68, 0x67,
20 0x66, 0x66, 0x65, 0x64, 0x64, 0x63, 0x62, 0x62, 0x61, 0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5d, 0x5c, 0x5c, 0x5b, 0x5a,
21 0x5a, 0x59, 0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x55, 0x54, 0x54, 0x53, 0x53, 0x52, 0x52, 0x51, 0x50, 0x50, 0x4f,
22 0x4f, 0x4e, 0x4e, 0x4d, 0x4d, 0x4c, 0x4b, 0x4b, 0x4a, 0x4a, 0x49, 0x49, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46, 0x45,
23 0x45, 0x44, 0x44, 0x43, 0x43, 0x42, 0x42, 0x41, 0x41, 0x40, 0x40, 0x3f, 0x3f, 0x3e, 0x3e, 0x3d, 0x3d, 0x3c, 0x3c,
24 0x3b, 0x3b, 0x3a, 0x3a, 0x39, 0x39, 0x39, 0x38, 0x38, 0x37, 0x37, 0x36, 0x36, 0x35, 0x35, 0x35, 0x34, 0x34, 0x33,
25 0x33, 0x32, 0x32, 0x32, 0x31, 0x31, 0x30, 0x30, 0x2f, 0x2f, 0x2f, 0x2e, 0x2e, 0x2d, 0x2d, 0x2d, 0x2c, 0x2c, 0x2b,
26 0x2b, 0x2b, 0x2a, 0x2a, 0x29, 0x29, 0x29, 0x28, 0x28, 0x27, 0x27, 0x27, 0x26, 0x26, 0x26, 0x25, 0x25, 0x24, 0x24,
27 0x24, 0x23, 0x23, 0x23, 0x22, 0x22, 0x21, 0x21, 0x21, 0x20, 0x20, 0x20, 0x1f, 0x1f, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d,
28 0x1d, 0x1d, 0x1c, 0x1c, 0x1b, 0x1b, 0x1b, 0x1a, 0x1a, 0x1a, 0x19, 0x19, 0x19, 0x18, 0x18, 0x18, 0x18, 0x17, 0x17,
29 0x17, 0x16, 0x16, 0x16, 0x15, 0x15, 0x15, 0x14, 0x14, 0x14, 0x13, 0x13, 0x13, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11,
30 0x11, 0x10, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0c, 0x0c, 0x0b,
31 0x0b, 0x0b, 0x0a, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x06, 0x06,
32 0x06, 0x06, 0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01,
33 0x01, 0x01, 0x00, 0x00};

被这些函数引用 lmmp_sqrt_1_().