LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom43.c 文件参考
+ mul_toom43.c 的引用(Include)关系图:

浏览源代码.

宏定义

#define a0   numa
 
#define a0a2   scratch
 
#define a1   (numa + n)
 
#define a1a3   asm1
 
#define a2   (numa + 2 * n)
 
#define a3   (numa + 3 * n)
 
#define as1   (dst + 4 * n + 4) /* n+1 */
 
#define as2   (dst + 3 * n + 3) /* n+1 */
 
#define asm1   (scratch + 3 * n + 3) /* n+1 */
 
#define asm2   (scratch + 4 * n + 4) /* n+1 */
 
#define b0   numb
 
#define b0b2   scratch
 
#define b1   (numb + n)
 
#define b1d   bsm1
 
#define b2   (numb + 2 * n)
 
#define bs1   dst /* n+1 */
 
#define bs2   (dst + 2 * n + 2) /* n+1 */
 
#define bsm1   (scratch + 2 * n + 2) /* n+1 */
 
#define bsm2   (dst + n + 1) /* n+1 */
 
#define v0   dst /* 2n */
 
#define v1   (dst + 2 * n) /* 2n+1 */
 
#define v2   (scratch + 4 * n + 2) /* 2n+1 */
 
#define vinf   (dst + 5 * n) /* s+t */
 
#define vm1   (scratch) /* 2n+1 */
 
#define vm2   (scratch + 2 * n + 1) /* 2n+1 */
 

函数

void lmmp_mul_toom43_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
 

宏定义说明

◆ a0

#define a0   numa

◆ a0a2

#define a0a2   scratch

◆ a1

#define a1   (numa + n)

◆ a1a3

#define a1a3   asm1

◆ a2

#define a2   (numa + 2 * n)

◆ a3

#define a3   (numa + 3 * n)

◆ as1

#define as1   (dst + 4 * n + 4) /* n+1 */

◆ as2

#define as2   (dst + 3 * n + 3) /* n+1 */

◆ asm1

#define asm1   (scratch + 3 * n + 3) /* n+1 */

◆ asm2

#define asm2   (scratch + 4 * n + 4) /* n+1 */

◆ b0

#define b0   numb

◆ b0b2

#define b0b2   scratch

◆ b1

#define b1   (numb + n)

◆ b1d

#define b1d   bsm1

◆ b2

#define b2   (numb + 2 * n)

◆ bs1

#define bs1   dst /* n+1 */

◆ bs2

#define bs2   (dst + 2 * n + 2) /* n+1 */

◆ bsm1

#define bsm1   (scratch + 2 * n + 2) /* n+1 */

◆ bsm2

#define bsm2   (dst + n + 1) /* n+1 */

◆ v0

#define v0   dst /* 2n */

◆ v1

#define v1   (dst + 2 * n) /* 2n+1 */

◆ v2

#define v2   (scratch + 4 * n + 2) /* 2n+1 */

◆ vinf

#define vinf   (dst + 5 * n) /* s+t */

◆ vm1

#define vm1   (scratch) /* 2n+1 */

◆ vm2

#define vm2   (scratch + 2 * n + 1) /* 2n+1 */

函数说明

◆ lmmp_mul_toom43_()

void lmmp_mul_toom43_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_size_t  na,
mp_srcptr restrict  numb,
mp_size_t  nb 
)

在文件 mul_toom43.c25 行定义.

25 {
26 lmmp_param_assert(4 * na >= 5 * nb);
27 lmmp_param_assert(3 * na <= 5 * nb);
28 mp_size_t n, s, t;
29 enum toom6_flags flags;
30 mp_limb_t cy;
31
32#define a0 numa
33#define a1 (numa + n)
34#define a2 (numa + 2 * n)
35#define a3 (numa + 3 * n)
36#define b0 numb
37#define b1 (numb + n)
38#define b2 (numb + 2 * n)
39
40 n = 1 + (3 * na >= 4 * nb ? (na - 1) >> 2 : (nb - 1) / (mp_size_t)3);
42 mp_limb_t* restrict scratch = SALLOC_TYPE(6 * n + 6, mp_limb_t);
43
44 s = na - 3 * n;
45 t = nb - 2 * n;
46
47 lmmp_debug_assert(0 < s && s <= n);
48 lmmp_debug_assert(0 < t && t <= n);
49
50 /* This is true whenever na >= 25 or nb >= 19, I think. It
51 guarantees that we can fit 5 values of size n+1 in the product
52 area. */
53 lmmp_debug_assert(s + t >= 5);
54
55#define v0 dst /* 2n */
56#define vm1 (scratch) /* 2n+1 */
57#define v1 (dst + 2 * n) /* 2n+1 */
58#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
59#define v2 (scratch + 4 * n + 2) /* 2n+1 */
60#define vinf (dst + 5 * n) /* s+t */
61#define bs1 dst /* n+1 */
62#define bsm1 (scratch + 2 * n + 2) /* n+1 */
63#define asm1 (scratch + 3 * n + 3) /* n+1 */
64#define asm2 (scratch + 4 * n + 4) /* n+1 */
65#define bsm2 (dst + n + 1) /* n+1 */
66#define bs2 (dst + 2 * n + 2) /* n+1 */
67#define as2 (dst + 3 * n + 3) /* n+1 */
68#define as1 (dst + 4 * n + 4) /* n+1 */
69
70 /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
71 limb, because products will overwrite 2n+2 limbs. */
72
73#define a0a2 scratch
74#define b0b2 scratch
75#define a1a3 asm1
76#define b1d bsm1
77
78 /* Compute as2 and asm2. */
79 flags = (enum toom6_flags)(toom6_vm2_neg & lmmp_toom_eval_dgr3_pm2_(as2, asm2, numa, n, s, a1a3));
80
81 /* Compute bs2 and bsm2. */
82 b1d[n] = lmmp_shl_(b1d, b1, n, 1); /* 2b1 */
83 cy = lmmp_shl_(b0b2, b2, t, 2); /* 4b2 */
84 cy += lmmp_add_n_(b0b2, b0b2, b0, t); /* 4b2 + b0 */
85 if (t != n)
86 cy = lmmp_add_1_(b0b2 + t, b0 + t, n - t, cy);
87 b0b2[n] = cy;
88
89 if (lmmp_cmp_(b0b2, b1d, n + 1) < 0) {
91 flags = (enum toom6_flags)(flags ^ toom6_vm2_neg);
92 } else {
94 }
95
96
97 /* Compute as1 and asm1. */
98 flags = (enum toom6_flags)(flags ^ (toom6_vm1_neg & lmmp_toom_eval_dgr3_pm1_(as1, asm1, numa, n, s, a0a2)));
99
100 /* Compute bs1 and bsm1. */
101 bsm1[n] = lmmp_add_(bsm1, b0, n, b2, t);
102 if (bsm1[n] == 0 && lmmp_cmp_(bsm1, b1, n) < 0) {
103 cy = lmmp_add_n_sub_n_(bs1, bsm1, b1, bsm1, n);
104 bs1[n] = cy >> 1;
105 flags = (enum toom6_flags)(flags ^ toom6_vm1_neg);
106 } else {
107 cy = lmmp_add_n_sub_n_(bs1, bsm1, bsm1, b1, n);
108 bs1[n] = bsm1[n] + (cy >> 1);
109 bsm1[n] -= cy & 1;
110 }
111
112 lmmp_debug_assert(as1[n] <= 3);
113 lmmp_debug_assert(bs1[n] <= 2);
114 lmmp_debug_assert(asm1[n] <= 1);
115 lmmp_debug_assert(bsm1[n] <= 1);
116 lmmp_debug_assert(as2[n] <= 14);
117 lmmp_debug_assert(bs2[n] <= 6);
118 lmmp_debug_assert(asm2[n] <= 9);
119 lmmp_debug_assert(bsm2[n] <= 4);
120
121 /* vm1, 2n+1 limbs */
122 lmmp_mul_n_(vm1, asm1, bsm1, n + 1); /* W4 */
123
124 /* vm2, 2n+1 limbs */
125 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* W2 */
126
127 /* v2, 2n+1 limbs */
128 lmmp_mul_n_(v2, as2, bs2, n + 1); /* W1 */
129
130 /* v1, 2n+1 limbs */
131 lmmp_mul_n_(v1, as1, bs1, n + 1); /* W3 */
132
133 /* vinf, s+t limbs */ /* W0 */
134 if (s > t)
135 lmmp_mul_(vinf, a3, s, b2, t);
136 else
137 lmmp_mul_(vinf, b2, t, a3, s);
138
139 /* v0, 2n limbs */
140 lmmp_mul_n_(v0, numa, numb, n); /* W5 */
141
142 lmmp_toom_interp6_(dst, n, flags, vm1, vm2, v2, t + s);
143
145#undef v0
146#undef vm1
147#undef v1
148#undef vm2
149#undef v2
150#undef vinf
151#undef bs1
152#undef bs2
153#undef bsm1
154#undef bsm2
155#undef asm1
156#undef asm2
157#undef as1
158#undef as2
159#undef a0a2
160#undef b0b2
161#undef a1a3
162#undef b1d
163#undef a0
164#undef a1
165#undef a2
166#undef a3
167#undef b0
168#undef b1
169#undef b2
170}
#define scratch
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
static mp_limb_t lmmp_add_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数加单精度数静态内联函数 [dst,na]=[numa,na]+x
Definition lmmpn.h:1111
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
#define bs1
#define bs2
#define b0
#define v0
#define a3
#define b1
#define bsm1
#define as2
#define v2
#define vm1
#define b0b2
#define asm2
#define bsm2
#define a0a2
#define asm1
#define b2
#define vinf
#define as1
#define v1
#define a1a3
#define vm2
#define b1d
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105
void lmmp_toom_interp6_(mp_ptr dst, mp_size_t n, enum toom6_flags flags, mp_ptr w4, mp_ptr w2, mp_ptr w1, mp_size_t w0n)
Toom插值计算(6点插值):用于Toom-43和Toom-52 乘法算法
toom6_flags
Definition toom_interp.h:25
@ toom6_vm2_neg
Definition toom_interp.h:25
@ toom6_vm1_neg
Definition toom_interp.h:25
int lmmp_toom_eval_dgr3_pm2_(mp_ptr xp2, mp_ptr xm2, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
Toom-3 专用:3次多项式在 x = +2 和 x = -2 处求值 计算 P(+2) 和 P(-2),其中 P(x) 是一个3次多项式(4段系数)。
int lmmp_toom_eval_dgr3_pm1_(mp_ptr xp1, mp_ptr xm1, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
Toom-3 专用:3次多项式在 x = +1 和 x = -1 处求值 计算 P(+1) 和 P(-1),其中 P(x) 是一个3次多项式(4段系数)。

引用了 a0a2, a1a3, a3, as1, as2, asm1, asm2, b0, b0b2, b1, b1d, b2, bs1, bs2, bsm1, bsm2, lmmp_add_(), lmmp_add_1_(), lmmp_add_n_(), lmmp_add_n_sub_n_(), lmmp_cmp_(), lmmp_debug_assert, lmmp_mul_(), lmmp_mul_n_(), lmmp_param_assert, lmmp_shl_(), lmmp_toom_eval_dgr3_pm1_(), lmmp_toom_eval_dgr3_pm2_(), lmmp_toom_interp6_(), SALLOC_TYPE, scratch, TEMP_S_DECL, TEMP_S_FREE, toom6_vm1_neg, toom6_vm2_neg, v0, v1, v2, vinf, vm1 , 以及 vm2.

+ 函数调用图: