|
LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
|
#include "../../../include/lammp/impl/signed.h"#include "../../../include/lammp/impl/tmp_alloc.h"#include "../../../include/lammp/lmmpn.h"#include "../../../include/lammp/impl/mparam.h"#include "../../../include/lammp/matrix.h"
mat22_mul.c 的引用(Include)关系图:宏定义 | |
| #define | A11 (matA->a00) |
| #define | A11 (mat->a00) |
| #define | A11n (matA->n00) |
| #define | A11n (mat->n00) |
| #define | A12 (matA->a01) |
| #define | A12 (mat->a01) |
| #define | A12n (matA->n01) |
| #define | A12n (mat->n01) |
| #define | A21 (matA->a10) |
| #define | A21 (mat->a10) |
| #define | A21n (matA->n10) |
| #define | A21n (mat->n10) |
| #define | A22 (matA->a11) |
| #define | A22 (mat->a11) |
| #define | A22n (matA->n11) |
| #define | A22n (mat->n11) |
| #define | B11 (matB->a00) |
| #define | B11n (matB->n00) |
| #define | B12 (matB->a01) |
| #define | B12n (matB->n01) |
| #define | B21 (matB->a10) |
| #define | B21n (matB->n10) |
| #define | B22 (matB->a11) |
| #define | B22n (matB->n11) |
| #define | C11 (dst->a00) |
| #define | C11 (dst->a00) |
| #define | C11n (dst->n00) |
| #define | C11n (dst->n00) |
| #define | C12 (dst->a01) |
| #define | C12 (dst->a01) |
| #define | C12n (dst->n01) |
| #define | C12n (dst->n01) |
| #define | C21 (dst->a10) |
| #define | C21 (dst->a10) |
| #define | C21n (dst->n10) |
| #define | C21n (dst->n10) |
| #define | C22 (dst->a11) |
| #define | C22 (dst->a11) |
| #define | C22n (dst->n11) |
| #define | C22n (dst->n11) |
| #define | p1 tp |
| #define | p1 tp |
| #define | p1 (tp) |
| #define | p1 (tp) |
| #define | p1n n1 |
| #define | p1n n1 |
| #define | p2 tp + tn |
| #define | p2 tp + tn |
| #define | p2 (tp + tn) |
| #define | p2 (tp + tn) |
| #define | p2n n5 |
| #define | p2n n5 |
| #define | p3 (tp + 2 * tn) |
| #define | p3 (tp + 2 * tn) |
| #define | p3n n2 |
| #define | p3n n2 |
| #define | p4 (tp + 3 * tn) |
| #define | p4 (tp + 3 * tn) |
| #define | p4n n7 |
| #define | p4n n7 |
| #define | p5 (tp + 4 * tn) |
| #define | p5 (tp + 4 * tn) |
| #define | p5n n6 |
| #define | p5n n6 |
| #define | p6 (tp + 5 * tn) |
| #define | p6 (tp + 5 * tn) |
| #define | p6n n3 |
| #define | p6n n3 |
| #define | p7 (tp + 6 * tn) |
| #define | p7 (tp + 6 * tn) |
| #define | p7n n4 |
| #define | p7n n4 |
| #define | s1 (dst->a00) |
| #define | s1 (dst->a00) |
| #define | s2 (dst->a01) |
| #define | s2 (dst->a01) |
| #define | s3 (dst->a10) |
| #define | s3 (dst->a10) |
| #define | s4 (dst->a11) |
| #define | s4 (dst->a11) |
| #define | t1 (dst->a00 + maxa) |
| #define | t2 (dst->a01 + maxa) |
| #define | t3 (dst->a10 + maxa) |
| #define | t4 (dst->a11 + maxa) |
| #define | U1 p5 |
| #define | U1 p5 |
| #define | U1n p5n |
| #define | U1n p5n |
| #define | U2 p1 |
| #define | U2 p1 |
| #define | U2n p1n |
| #define | U2n p1n |
| #define | U3 U1 |
| #define | U3 U1 |
| #define | U3n n8 |
| #define | U3n n8 |
函数 | |
| void | lmmp_mat22_mul_basecase_ (lmmp_mat22_t *dst, const lmmp_mat22_t *matA, const lmmp_mat22_t *matB, mp_ptr tp, mp_size_t tn) |
| 计算2x2矩阵和2x2矩阵的乘积 | |
| int | lmmp_mat22_mul_size_ (lmmp_mat22_t *dst, const lmmp_mat22_t *matA, const lmmp_mat22_t *matB, mp_size_t *tn, mp_size_t *maxa) |
| 计算2x2矩阵和2x2矩阵的乘积需要分配的内存 | |
| void | lmmp_mat22_mul_strassen_ (lmmp_mat22_t *dst, const lmmp_mat22_t *matA, const lmmp_mat22_t *matB, mp_ptr tp, mp_size_t tn, mp_size_t maxa) |
| 计算(稠密)2x2矩阵和(稠密)2x2矩阵的乘积(STRASSEN算法) | |
| void | lmmp_mat22_sqr_basecase_ (lmmp_mat22_t *dst, const lmmp_mat22_t *matA, mp_ptr tp, mp_size_t tn) |
| 计算2x2矩阵平方 | |
| void | lmmp_mat22_sqr_strassen_ (lmmp_mat22_t *dst, const lmmp_mat22_t *mat, mp_ptr tp, mp_size_t tn) |
| 计算(稠密)2x2矩阵平方(STRASSEN算法) | |
| #define A11 (matA->a00) |
| #define A11 (mat->a00) |
| #define A11n (matA->n00) |
| #define A11n (mat->n00) |
| #define A12 (matA->a01) |
| #define A12 (mat->a01) |
| #define A12n (matA->n01) |
| #define A12n (mat->n01) |
| #define A21 (matA->a10) |
| #define A21 (mat->a10) |
| #define A21n (matA->n10) |
| #define A21n (mat->n10) |
| #define A22 (matA->a11) |
| #define A22 (mat->a11) |
| #define A22n (matA->n11) |
| #define A22n (mat->n11) |
| #define B11 (matB->a00) |
| #define B11n (matB->n00) |
| #define B12 (matB->a01) |
| #define B12n (matB->n01) |
| #define B21 (matB->a10) |
| #define B21n (matB->n10) |
| #define B22 (matB->a11) |
| #define B22n (matB->n11) |
| #define C11 (dst->a00) |
| #define C11 (dst->a00) |
| #define C11n (dst->n00) |
| #define C11n (dst->n00) |
| #define C12 (dst->a01) |
| #define C12 (dst->a01) |
| #define C12n (dst->n01) |
| #define C12n (dst->n01) |
| #define C21 (dst->a10) |
| #define C21 (dst->a10) |
| #define C21n (dst->n10) |
| #define C21n (dst->n10) |
| #define C22 (dst->a11) |
| #define C22 (dst->a11) |
| #define C22n (dst->n11) |
| #define C22n (dst->n11) |
| #define p1 tp |
| #define p1 tp |
| #define p1 (tp) |
| #define p1 (tp) |
| #define p1n n1 |
| #define p1n n1 |
| #define p2 tp + tn |
| #define p2 tp + tn |
| #define p2 (tp + tn) |
| #define p2 (tp + tn) |
| #define p2n n5 |
| #define p2n n5 |
| #define p3 (tp + 2 * tn) |
| #define p3 (tp + 2 * tn) |
| #define p3n n2 |
| #define p3n n2 |
| #define p4 (tp + 3 * tn) |
| #define p4 (tp + 3 * tn) |
| #define p4n n7 |
| #define p4n n7 |
| #define p5 (tp + 4 * tn) |
| #define p5 (tp + 4 * tn) |
| #define p5n n6 |
| #define p5n n6 |
| #define p6 (tp + 5 * tn) |
| #define p6 (tp + 5 * tn) |
| #define p6n n3 |
| #define p6n n3 |
| #define p7 (tp + 6 * tn) |
| #define p7 (tp + 6 * tn) |
| #define p7n n4 |
| #define p7n n4 |
| #define s1 (dst->a00) |
| #define s1 (dst->a00) |
| #define s2 (dst->a01) |
| #define s2 (dst->a01) |
| #define s3 (dst->a10) |
| #define s3 (dst->a10) |
| #define s4 (dst->a11) |
| #define s4 (dst->a11) |
| #define t1 (dst->a00 + maxa) |
| #define t2 (dst->a01 + maxa) |
| #define t3 (dst->a10 + maxa) |
| #define t4 (dst->a11 + maxa) |
| #define U1 p5 |
| #define U1 p5 |
| #define U1n p5n |
| #define U1n p5n |
| #define U2 p1 |
| #define U2 p1 |
| #define U2n p1n |
| #define U2n p1n |
| #define U3 U1 |
| #define U3 U1 |
| #define U3n n8 |
| #define U3n n8 |
| void lmmp_mat22_mul_basecase_ | ( | lmmp_mat22_t * | dst, |
| const lmmp_mat22_t * | matA, | ||
| const lmmp_mat22_t * | matB, | ||
| mp_ptr | tp, | ||
| mp_size_t | tn | ||
| ) |
计算2x2矩阵和2x2矩阵的乘积
| dst | 结果矩阵。 |
| matA | 矩阵A |
| matB | 矩阵B |
| tp | 临时缓冲区,用于存储中间结果,需要分配2*tn个limb,若为NULL,则会自动分配。 |
| tn | 缓冲区的limb长度 |
在文件 mat22_mul.c 第 83 行定义.
引用了 lmmp_mat22_t::a00, lmmp_mat22_t::a01, lmmp_mat22_t::a10, lmmp_mat22_t::a11, lmmp_add_signed_(), lmmp_mat22_sqr_basecase_(), lmmp_mul_signed_(), lmmp_param_assert, lmmp_mat22_t::n00, lmmp_mat22_t::n01, lmmp_mat22_t::n10, lmmp_mat22_t::n11, p1, p2, TALLOC_TYPE, TEMP_DECL, TEMP_FREE , 以及 tp.
被这些函数引用 lmmp_mat22_mul_().
函数调用图:
这是这个函数的调用关系图:| int lmmp_mat22_mul_size_ | ( | lmmp_mat22_t * | dst, |
| const lmmp_mat22_t * | matA, | ||
| const lmmp_mat22_t * | matB, | ||
| mp_size_t * | tn, | ||
| mp_size_t * | maxa | ||
| ) |
计算2x2矩阵和2x2矩阵的乘积需要分配的内存
| dst | 结果矩阵,dst中的n将会被覆盖为对应位置需要的limb长度,此函数不分配内存。 |
| matA | 矩阵A |
| matB | 矩阵B |
| tn | 输出参数,将会被覆盖为缓冲区需要的limb长度,正数 |
| maxa | 如果被覆盖,即matA中最大的元素的limb长度+1,此参数只有当确认使用STRASSEN算法时才需要 |
在文件 mat22_mul.c 第 13 行定义.
引用了 A11, B11, LMMP_ABS, LMMP_MAX, lmmp_param_assert, MAT22_MUL_STRASSEN_THRESHOLD, MAT22_SQR_STRASSEN_THRESHOLD, lmmp_mat22_t::n00, lmmp_mat22_t::n01, lmmp_mat22_t::n10 , 以及 lmmp_mat22_t::n11.
| void lmmp_mat22_mul_strassen_ | ( | lmmp_mat22_t * | dst, |
| const lmmp_mat22_t * | matA, | ||
| const lmmp_mat22_t * | matB, | ||
| mp_ptr | tp, | ||
| mp_size_t | tn, | ||
| mp_size_t | maxa | ||
| ) |
计算(稠密)2x2矩阵和(稠密)2x2矩阵的乘积(STRASSEN算法)
| dst | 结果矩阵。 |
| matA | 矩阵A |
| matB | 矩阵B |
| tp | 临时缓冲区,用于存储中间结果,需要分配7*(tn+1)个limb,若为NULL,则会自动分配。 |
| tn | 缓冲区的limb长度 |
| maxa | matA中最大的元素的limb长度+1,建议由lmmp_mat22_mul_size_确定 |
在文件 mat22_mul.c 第 194 行定义.
引用了 A11, A11n, A12, A12n, A21, A21n, A22, A22n, B11, B11n, B12, B12n, B21, B21n, B22, B22n, BALLOC_TYPE, C11, C11n, C12, C12n, C21, C21n, C22, C22n, lmmp_add_signed_(), lmmp_mat22_sqr_strassen_(), lmmp_mul_signed_(), lmmp_param_assert, p1, p1n, p2, p2n, p3, p3n, p4, p4n, p5, p5n, p6, p6n, p7, p7n, s1, s2, s3, s4, t1, t2, t3, t4, TEMP_B_DECL, TEMP_B_FREE, tp, U1, U1n, U2, U2n, U3 , 以及 U3n.
被这些函数引用 lmmp_mat22_mul_().
函数调用图:
这是这个函数的调用关系图:| void lmmp_mat22_sqr_basecase_ | ( | lmmp_mat22_t * | dst, |
| const lmmp_mat22_t * | matA, | ||
| mp_ptr | tp, | ||
| mp_size_t | tn | ||
| ) |
计算2x2矩阵平方
| dst | 结果矩阵。 |
| matA | 矩阵A |
| tp | 临时缓冲区,用于存储中间结果,需要分配2*tn个limb,若为NULL,则会自动分配。 |
| tn | 缓冲区的limb长度 |
| maxa | matA中最大的元素的limb长度+1,建议由lmmp_mat22_mul_size_确定 |
在文件 mat22_mul.c 第 119 行定义.
引用了 lmmp_mat22_t::a00, lmmp_mat22_t::a01, lmmp_mat22_t::a10, lmmp_mat22_t::a11, lmmp_add_signed_(), lmmp_mul_signed_(), lmmp_sqr_signed_(), lmmp_mat22_t::n00, lmmp_mat22_t::n01, lmmp_mat22_t::n10, lmmp_mat22_t::n11, p1, p2, TALLOC_TYPE, TEMP_DECL, TEMP_FREE , 以及 tp.
被这些函数引用 lmmp_mat22_mul_basecase_() , 以及 lmmp_mat22_sqr_().
函数调用图:
这是这个函数的调用关系图:| void lmmp_mat22_sqr_strassen_ | ( | lmmp_mat22_t * | dst, |
| const lmmp_mat22_t * | matA, | ||
| mp_ptr | tp, | ||
| mp_size_t | tn | ||
| ) |
计算(稠密)2x2矩阵平方(STRASSEN算法)
| dst | 结果矩阵。 |
| matA | 矩阵A |
| tp | 临时缓冲区,用于存储中间结果,需要分配7*(tn+1)个limb,若为NULL,则会自动分配。 |
| tn | 缓冲区的limb长度 |
在文件 mat22_mul.c 第 346 行定义.
引用了 A11, A11n, A12, A12n, A21, A21n, A22, A22n, BALLOC_TYPE, C11, C11n, C12, C12n, C21, C21n, C22, C22n, lmmp_add_signed_(), lmmp_mul_signed_(), lmmp_param_assert, lmmp_sqr_signed_(), p1, p1n, p2, p2n, p3, p3n, p4, p4n, p5, p5n, p6, p6n, p7, p7n, s1, s2, s3, s4, TEMP_B_DECL, TEMP_B_FREE, tp, U1, U1n, U2, U2n, U3 , 以及 U3n.
被这些函数引用 lmmp_mat22_mul_strassen_() , 以及 lmmp_mat22_sqr_().
函数调用图:
这是这个函数的调用关系图: