
本文展示开源 C 库 bfdev 的大数运算模块。我们将分别在 x86 Linux 平台和 py32f030 为代表的嵌入式平台上分别进行演示,并进行性能测试。
关于 bfdev 库,这是一个开源的 C 语言算法库, 它具有:良好的可移植性,面向对象的方法设计、安装部署简单等等优势。
MPI 即 Multi precision integer (多精度整数),就是对很大的数进行一系列的运算。在数学中,数的大小是没有上限的,但是在计算机中,由于受 ALU 字长的限制,处理器无法对其进行直接计算,我们就需要用到多精度整数算法。
我们先给出两个平台基于 MPI 使用 machin 公式 计算圆周率的测试代码,再进行说明。
#define MODULE_NAME "mpi-machin" #define bfdev_log_fmt(fmt) MODULE_NAME ": " fmt #include <stdio.h> #include <bfdev/mpi.h> #include <bfdev/log.h> #include "../time.h" #include "helper.h" #define TEST_LEN 10000 #define TEST_SIZE (TEST_LEN / 4 + 1) #define TEST_LOOP (TEST_LEN / 1.39793 + 1) #define PRINT_RESULT 1 int main(int argc, const char *argv[]) { bfdev_mpi_t *vw, *vs, *vv, *vq; unsigned int k; int retval; if (!((vw = bfdev_mpi_create(NULL)) && (vs = bfdev_mpi_create(NULL)) && (vv = bfdev_mpi_create(NULL)) && (vq = bfdev_mpi_create(NULL)))) return 1; /** * Machin-like formula: * PI = 16arctan(1/5) - 4arctan(1/239) * * These formulas are used in conjunction with Gregory's * series, the Taylor series expansion for arctangent: * arctan(x) = x - (x^3)/3 + (x^5)/5 - (x^7)/7 + ... */ if ((retval = bfdev_mpi_seti(vw, 16 * 5)) || (retval = bfdev_mpi_seti(vv, 4 * 239)) || (retval = bfdev_mpi_seti(vq, 10000))) return retval; for (k = 0; k < TEST_SIZE; ++k) { if ((retval = bfdev_mpi_mul(vw, vw, vq)) || (retval = bfdev_mpi_mul(vv, vv, vq))) return retval; } bfdev_log_info("Convergence Machin %d:\n", TEST_LEN); EXAMPLE_TIME_STATISTICAL( for (k = 1; k <= TEST_LOOP; ++k) { if ((retval = bfdev_mpi_divi(vw, vw, vw, 5 * 5)) || (retval = bfdev_mpi_divi(vv, vv, vv, 239 * 239)) || (retval = bfdev_mpi_sub(vq, vw, vv)) || (retval = bfdev_mpi_divi(vq, vq, vq, 2 * k - 1))) return retval; if (k & 1) retval = bfdev_mpi_add(vs, vs, vq); else retval = bfdev_mpi_sub(vs, vs, vq); if (retval) return retval; } 0; ); #if PRINT_RESULT char *result; result = print_num(vs, 10); if (!result) return 1; printf("%c.", *result); puts(result + 1); free(result); #endif bfdev_mpi_destory(vw); bfdev_mpi_destory(vs); bfdev_mpi_destory(vv); bfdev_mpi_destory(vq); return 0; } 以下是运算 pi 后 10000 位的结果,共使用了 0.36 秒。
./build/examples/mpi/mpi-machin [info] mpi-machin: Convergence Machin 10000: [info] mpi-machin: real time: 0.360000 [info] mpi-machin: user time: 0.350000 [info] mpi-machin: kern time: 0.000000 3.14159 ... #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <errno.h> #include <bfdev.h> #include "main.h" #include "py32f0xx_hal.h" #define TEST_LEN 100 #define TEST_SIZE (TEST_LEN / 4 + 1) #define TEST_LOOP (TEST_LEN / 1.39793 + 1) int mpi_benchmark(void) { uint32_t start, time; bfdev_mpi_t *vw, *vs, *vv, *vq; unsigned int k; int retval; if (!((vw = bfdev_mpi_create(NULL)) && (vs = bfdev_mpi_create(NULL)) && (vv = bfdev_mpi_create(NULL)) && (vq = bfdev_mpi_create(NULL)))) return 1; if ((retval = bfdev_mpi_seti(vw, 16 * 5)) || (retval = bfdev_mpi_seti(vv, 4 * 239)) || (retval = bfdev_mpi_seti(vq, 10000))) return retval; for (k = 0; k < TEST_SIZE; ++k) { if ((retval = bfdev_mpi_mul(vw, vw, vq)) || (retval = bfdev_mpi_mul(vv, vv, vq))) return retval; } bfdev_log_info("Convergence Machin %d:\n", TEST_LEN); start = HAL_GetTick(); for (k = 1; k <= TEST_LOOP; ++k) { if ((retval = bfdev_mpi_divi(vw, vw, vw, 5 * 5)) || (retval = bfdev_mpi_divi(vv, vv, vv, 239 * 239)) || (retval = bfdev_mpi_sub(vq, vw, vv)) || (retval = bfdev_mpi_divi(vq, vq, vq, 2 * k - 1))) return retval; if (k & 1) retval = bfdev_mpi_add(vs, vs, vq); else retval = bfdev_mpi_sub(vs, vs, vq); if (retval) return retval; iwdg_touch(); } bfdev_mpi_destory(vw); bfdev_mpi_destory(vs); bfdev_mpi_destory(vv); bfdev_mpi_destory(vq); iwdg_touch(); time = HAL_GetTick() - start; bfdev_log_notice("Total time: %lu.%lus\n", time / 1000, time % 1000); return 0; } 以下是运算结果,由于 py32f030 只有 4KiB RAM ,我们这里只计算 100 位。
[info] benchmark: Benchmark for PY32F0xx. [info] benchmark: Bfdev version: 1.0.1-devel [info] benchmark: This may take a few minutes... [info] Convergence Machin 100: [notice] Total time: 0.21s ... 我们计算的大体流程如下:
bfdev 中,使用 MPI 需要引入 <bfdev/mpi.h> 头文件,或者引入 <bfdev.h> 顶层头文件也可以。
这里要重点介绍一下 bfdev_mpi_create(alloc) 函数,它使用了 bfdev 的 allocator 兼容层,传入 NULL 将使用系统默认内存分配器,用户也可以使用自己的内存池为 MPI 分配内存。
欢迎各位感兴趣的读者去访问 bfdev 仓库。