LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
sqr_toom4.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/impl/toom_interp.h"
8
9/*
10Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
11
12 <-s--><--n--><--n--><--n-->
13 |-a3-|--a2--|--a1--|--a0--|
14
15 v0 = a0 ^2 # A(0)^2
16 v1 = ( a0+ a1+ a2+ a3)^2 # A(1)^2 ah <= 3
17 vm1 = ( a0- a1+ a2- a3)^2 # A(-1)^2 |ah| <= 1
18 v2 = ( a0+2a1+4a2+8a3)^2 # A(2)^2 ah <= 14
19 vh = (8a0+4a1+2a2+ a3)^2 # A(1/2)^2 ah <= 14
20 vmh = (8a0-4a1+2a2- a3)^2 # A(-1/2)^2 -4<=ah<=9
21 vinf= a3 ^2 # A(inf)^2
22*/
23
24void lmmp_sqr_toom4_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na) {
25 lmmp_param_assert(na > 0);
26 lmmp_param_assert(dst != NULL);
27 lmmp_param_assert(numa != NULL);
28 mp_size_t n, s;
29 mp_limb_t cy;
30
31#define a0 numa
32#define a1 (numa + n)
33#define a2 (numa + 2 * n)
34#define a3 (numa + 3 * n)
35
36 n = (na + 3) >> 2;
38 mp_ptr restrict scratch = SALLOC_TYPE(8 * n + 8, mp_limb_t);
39
40 s = na - 3 * n;
41
42 lmmp_debug_assert(0 < s && s <= n);
43
44 /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
45 * following limb, so these must be computed in order, and we need a
46 * one limb gap to tp. */
47#define v0 dst /* 2n */
48#define v1 (dst + 2 * n) /* 2n+1 */
49#define vinf (dst + 6 * n) /* s+t */
50#define v2 scratch /* 2n+1 */
51#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
52#define vh (scratch + 4 * n + 2) /* 2n+1 */
53#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
54#define tp (scratch + 8 * n + 5)
55
56 /* No overlap with v1 */
57#define apx dst /* n+1 */
58#define amx (dst + 4 * n + 2) /* n+1 */
59
60 /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3. */
61 lmmp_toom_eval_dgr3_pm2_(apx, amx, numa, n, s, tp);
62
63 lmmp_sqr_(v2, apx, n + 1); /* v2, 2n+1 limbs */
64 lmmp_sqr_(vm2, amx, n + 1); /* vm2, 2n+1 limbs */
65
66 /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
67 cy = lmmp_addshl1_n_(apx, a1, a0, n);
68 cy = 2 * cy + lmmp_addshl1_n_(apx, a2, apx, n);
69 if (s < n) {
70 mp_limb_t cy2;
71 cy2 = lmmp_addshl1_n_(apx, a3, apx, s);
72 apx[n] = 2 * cy + lmmp_shl_(apx + s, apx + s, n - s, 1);
73 lmmp_inc_1(apx + s, cy2);
74 } else
75 apx[n] = 2 * cy + lmmp_addshl1_n_(apx, a3, apx, n);
76
77 lmmp_debug_assert(apx[n] < 15);
78
79 lmmp_sqr_(vh, apx, n + 1); /* vh, 2n+1 limbs */
80
81 /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3. */
82 lmmp_toom_eval_dgr3_pm1_(apx, amx, numa, n, s, tp);
83
84 lmmp_sqr_(v1, apx, n + 1); /* v1, 2n+1 limbs */
85 lmmp_sqr_(vm1, amx, n + 1); /* vm1, 2n+1 limbs */
86
87 lmmp_sqr_(v0, a0, n);
88 lmmp_sqr_(vinf, a3, s); /* vinf, 2s limbs */
89
90 lmmp_toom_interp7_(dst, n, (enum toom7_flags)0, vm2, vm1, v2, vh, 2 * s, tp);
92}
#define scratch
mp_limb_t * mp_ptr
Definition lmmp.h:215
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
const mp_limb_t * mp_srcptr
Definition lmmp.h:216
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
void lmmp_sqr_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
大数平方操作 [dst,2*na] = [numa,na]^2
Definition sqr.c:10
mp_limb_t lmmp_addshl1_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法结合左移1位操作 [dst,n] = [numa,n] + ([numb,n] << 1)
Definition shl.c:56
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
#define v0
#define a3
#define v2
#define vm1
#define apx
#define vh
#define a2
#define a0
#define tp
#define a1
void lmmp_sqr_toom4_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na)
Definition sqr_toom4.c:24
#define vinf
#define amx
#define v1
#define vm2
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105
toom7_flags
Definition toom_interp.h:27
void lmmp_toom_interp7_(mp_ptr dst, mp_size_t n, enum toom7_flags flags, mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, mp_size_t w6n, mp_ptr tp)
Toom插值计算(7点插值):用于Toom-44、Toom-53、Toom-62 乘法算法
int lmmp_toom_eval_dgr3_pm2_(mp_ptr xp2, mp_ptr xm2, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
Toom-3 专用:3次多项式在 x = +2 和 x = -2 处求值 计算 P(+2) 和 P(-2),其中 P(x) 是一个3次多项式(4段系数)。
int lmmp_toom_eval_dgr3_pm1_(mp_ptr xp1, mp_ptr xm1, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
Toom-3 专用:3次多项式在 x = +1 和 x = -1 处求值 计算 P(+1) 和 P(-1),其中 P(x) 是一个3次多项式(4段系数)。