LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom33.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/lmmpn.h"
8#include "../../include/lammp/impl/toom_interp.h"
9
10/*
11Evaluate in: -1, 0, +1, +2, +inf
12
13 <-s--><--n--><--n-->
14 |-a2-|--a1--|--a0--|
15 |b2-|--b1--|--b0--|
16 <-t-><--n--><--n-->
17
18v0 = a0 * b0 # A(0)*B(0)
19v1 = (a0+ a1+ a2)*(b0+ b1+ b2) # A(1)*B(1) ah <= 2 bh <= 2
20vm1 = (a0- a1+ a2)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 bh <= 1
21v2 = (a0+2a1+4a2)*(b0+2b1+4b2) # A(2)*B(2) ah <= 6 bh <= 6
22vinf= a2 * b2 # A(inf)*B(inf)
23*/
24
25void lmmp_mul_toom33_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb) {
26 lmmp_param_assert(nb >= 26);
27 lmmp_param_assert(na >= nb);
28 lmmp_param_assert(4 * na <= 5 * nb);
30 mp_size_t n = (na + 2) / 3, s = na - 2 * n, t = nb - 2 * n;
31 int vm1_neg;
32 mp_limb_t cy, cy2, vinf0, am1h, bm1h;
33 mp_limb_t* restrict tp = SALLOC_TYPE(4 * n + 4, mp_limb_t);
34
35#define a0 numa
36#define a1 (numa + n)
37#define a2 (numa + 2 * n)
38#define b0 numb
39#define b1 (numb + n)
40#define b2 (numb + 2 * n)
41
42#define v0 dst //[dst,2*n]
43#define v1 (dst + 2 * n) //[dst+2*n,2*n+1]
44#define vinf (dst + 4 * n) //[dst+4*n,s+t]
45#define vm1 tp //[tp,2*n+1]
46#define v2 (tp + 2 * n + 2) //[tp+2*n+2,2*n+1]
47
48#define bm1 dst //[dst,n]
49#define am1 (dst + n) //[dst+n,n]
50#define ap1 tp //[tp,n+1]
51#define bp1 (tp + n + 1) //[tp+n+1,n+1]
52#define ap2 ap1 // same space
53#define bp2 bp1 // same space
54
55 // ap1, am1
56 cy = lmmp_add_(ap1, a0, n, a2, s);
57 if (cy == 0 && lmmp_cmp_(ap1, a1, n) < 0) {
58 cy = lmmp_add_n_sub_n_(ap1, am1, a1, ap1, n);
59 ap1[n] = cy >> 1;
60 am1h = 0;
61 vm1_neg = 1;
62 } else {
63 cy2 = lmmp_add_n_sub_n_(ap1, am1, ap1, a1, n);
64 ap1[n] = cy + (cy2 >> 1);
65 am1h = cy - (cy2 & 1);
66 vm1_neg = 0;
67 }
68
69 // bp1, bm1
70 cy = lmmp_add_(bp1, b0, n, b2, t);
71 if (cy == 0 && lmmp_cmp_(bp1, b1, n) < 0) {
72 cy = lmmp_add_n_sub_n_(bp1, bm1, b1, bp1, n);
73 bp1[n] = cy >> 1;
74 bm1h = 0;
75 vm1_neg ^= 1;
76 } else {
77 cy2 = lmmp_add_n_sub_n_(bp1, bm1, bp1, b1, n);
78 bp1[n] = cy + (cy2 >> 1);
79 bm1h = cy - (cy2 & 1);
80 }
81
82 // vinf
83 if (s > t)
84 lmmp_mul_(vinf, a2, s, b2, t);
85 else
86 lmmp_mul_n_(vinf, a2, b2, s);
87 vinf0 = vinf[0]; // overlap with v1
88 cy = vinf[1]; // overlap with v1
89
90 // v1
91 lmmp_mul_n_(v1, ap1, bp1, n + 1);
92 vinf[1] = cy; // restore, since v1[2*n+1]==0.
93
94 // ap2
95 cy = lmmp_addshl1_n_(ap2, a1, a2, s);
96 if (s != n)
97 cy = lmmp_add_1_(ap2 + s, a1 + s, n - s, cy);
98 cy = 2 * cy + lmmp_addshl1_n_(ap2, a0, ap2, n);
99 ap2[n] = cy;
100
101 // bp2
102 cy = lmmp_addshl1_n_(bp2, b1, b2, t);
103 if (t != n)
104 cy = lmmp_add_1_(bp2 + t, b1 + t, n - t, cy);
105 cy = 2 * cy + lmmp_addshl1_n_(bp2, b0, bp2, n);
106 bp2[n] = cy;
107
108 // v2
109 lmmp_mul_n_(v2, ap2, bp2, n + 1);
110
111 // vm1
112 lmmp_mul_n_(vm1, am1, bm1, n);
113 cy = 0;
114 if (am1h)
115 cy = bm1h + lmmp_add_n_(vm1 + n, vm1 + n, bm1, n);
116 if (bm1h)
117 cy += lmmp_add_n_(vm1 + n, vm1 + n, am1, n);
118 vm1[2 * n] = cy;
119
120 // v0
121 lmmp_mul_n_(v0, a0, b0, n);
122
123 lmmp_toom_interp5_(dst, v2, vm1, n, s + t, vm1_neg, vinf0);
125}
mp_limb_t * mp_ptr
Definition lmmp.h:215
uint64_t mp_size_t
Definition lmmp.h:212
const mp_limb_t * mp_srcptr
Definition lmmp.h:216
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
static mp_limb_t lmmp_add_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数加单精度数静态内联函数 [dst,na]=[numa,na]+x
Definition lmmpn.h:1111
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
mp_limb_t lmmp_addshl1_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法结合左移1位操作 [dst,n] = [numa,n] + ([numb,n] << 1)
Definition shl.c:56
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
#define ap2
#define b0
#define v0
#define b1
#define am1
#define ap1
#define v2
#define bp1
#define vm1
#define bm1
#define bp2
#define a2
#define a0
void lmmp_mul_toom33_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
Definition mul_toom33.c:25
#define a1
#define b2
#define vinf
#define v1
#define tp
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105
void lmmp_toom_interp5_(mp_ptr dst, mp_ptr v2, mp_ptr vm1, mp_size_t n, mp_size_t spt, int vm1_neg, mp_limb_t vinf0)
Toom插值计算(5点插值),用于Toom-33和Toom-42乘法算法