/src/mpdecimal-4.0.0/libmpdec/convolute.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2008-2024 Stefan Krah. All rights reserved. |
3 | | * |
4 | | * Redistribution and use in source and binary forms, with or without |
5 | | * modification, are permitted provided that the following conditions |
6 | | * are met: |
7 | | * |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 | | * SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | |
28 | | #include "bits.h" |
29 | | #include "constants.h" |
30 | | #include "convolute.h" |
31 | | #include "fnt.h" |
32 | | #include "fourstep.h" |
33 | | #include "mpdecimal.h" |
34 | | #include "numbertheory.h" |
35 | | #include "sixstep.h" |
36 | | #include "umodarith.h" |
37 | | |
38 | | |
39 | | /* |
40 | | * Bignum: Fast convolution using the Number Theoretic Transform. Used for |
41 | | * the multiplication of very large coefficients. |
42 | | */ |
43 | | |
44 | | |
45 | | /* Convolute the data in c1 and c2. Result is in c1. */ |
46 | | int |
47 | | fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum) |
48 | 0 | { |
49 | 0 | int (*fnt)(mpd_uint_t *, mpd_size_t, int); |
50 | 0 | int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); |
51 | | #ifdef PPRO |
52 | | double dmod; |
53 | | uint32_t dinvmod[3]; |
54 | | #endif |
55 | 0 | mpd_uint_t n_inv, umod; |
56 | 0 | mpd_size_t i; |
57 | | |
58 | |
|
59 | 0 | SETMODULUS(modnum); |
60 | 0 | n_inv = POWMOD(n, (umod-2)); |
61 | |
|
62 | 0 | if (ispower2(n)) { |
63 | 0 | if (n > SIX_STEP_THRESHOLD) { |
64 | 0 | fnt = six_step_fnt; |
65 | 0 | inv_fnt = inv_six_step_fnt; |
66 | 0 | } |
67 | 0 | else { |
68 | 0 | fnt = std_fnt; |
69 | 0 | inv_fnt = std_inv_fnt; |
70 | 0 | } |
71 | 0 | } |
72 | 0 | else { |
73 | 0 | fnt = four_step_fnt; |
74 | 0 | inv_fnt = inv_four_step_fnt; |
75 | 0 | } |
76 | |
|
77 | 0 | if (!fnt(c1, n, modnum)) { |
78 | 0 | return 0; |
79 | 0 | } |
80 | 0 | if (!fnt(c2, n, modnum)) { |
81 | 0 | return 0; |
82 | 0 | } |
83 | 0 | for (i = 0; i < n-1; i += 2) { |
84 | 0 | mpd_uint_t x0 = c1[i]; |
85 | 0 | mpd_uint_t y0 = c2[i]; |
86 | 0 | mpd_uint_t x1 = c1[i+1]; |
87 | 0 | mpd_uint_t y1 = c2[i+1]; |
88 | 0 | MULMOD2(&x0, y0, &x1, y1); |
89 | 0 | c1[i] = x0; |
90 | 0 | c1[i+1] = x1; |
91 | 0 | } |
92 | |
|
93 | 0 | if (!inv_fnt(c1, n, modnum)) { |
94 | 0 | return 0; |
95 | 0 | } |
96 | 0 | for (i = 0; i < n-3; i += 4) { |
97 | 0 | mpd_uint_t x0 = c1[i]; |
98 | 0 | mpd_uint_t x1 = c1[i+1]; |
99 | 0 | mpd_uint_t x2 = c1[i+2]; |
100 | 0 | mpd_uint_t x3 = c1[i+3]; |
101 | 0 | MULMOD2C(&x0, &x1, n_inv); |
102 | 0 | MULMOD2C(&x2, &x3, n_inv); |
103 | 0 | c1[i] = x0; |
104 | 0 | c1[i+1] = x1; |
105 | 0 | c1[i+2] = x2; |
106 | 0 | c1[i+3] = x3; |
107 | 0 | } |
108 | |
|
109 | 0 | return 1; |
110 | 0 | } |
111 | | |
112 | | /* Autoconvolute the data in c1. Result is in c1. */ |
113 | | int |
114 | | fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum) |
115 | 0 | { |
116 | 0 | int (*fnt)(mpd_uint_t *, mpd_size_t, int); |
117 | 0 | int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); |
118 | | #ifdef PPRO |
119 | | double dmod; |
120 | | uint32_t dinvmod[3]; |
121 | | #endif |
122 | 0 | mpd_uint_t n_inv, umod; |
123 | 0 | mpd_size_t i; |
124 | | |
125 | |
|
126 | 0 | SETMODULUS(modnum); |
127 | 0 | n_inv = POWMOD(n, (umod-2)); |
128 | |
|
129 | 0 | if (ispower2(n)) { |
130 | 0 | if (n > SIX_STEP_THRESHOLD) { |
131 | 0 | fnt = six_step_fnt; |
132 | 0 | inv_fnt = inv_six_step_fnt; |
133 | 0 | } |
134 | 0 | else { |
135 | 0 | fnt = std_fnt; |
136 | 0 | inv_fnt = std_inv_fnt; |
137 | 0 | } |
138 | 0 | } |
139 | 0 | else { |
140 | 0 | fnt = four_step_fnt; |
141 | 0 | inv_fnt = inv_four_step_fnt; |
142 | 0 | } |
143 | |
|
144 | 0 | if (!fnt(c1, n, modnum)) { |
145 | 0 | return 0; |
146 | 0 | } |
147 | 0 | for (i = 0; i < n-1; i += 2) { |
148 | 0 | mpd_uint_t x0 = c1[i]; |
149 | 0 | mpd_uint_t x1 = c1[i+1]; |
150 | 0 | MULMOD2(&x0, x0, &x1, x1); |
151 | 0 | c1[i] = x0; |
152 | 0 | c1[i+1] = x1; |
153 | 0 | } |
154 | |
|
155 | 0 | if (!inv_fnt(c1, n, modnum)) { |
156 | 0 | return 0; |
157 | 0 | } |
158 | 0 | for (i = 0; i < n-3; i += 4) { |
159 | 0 | mpd_uint_t x0 = c1[i]; |
160 | 0 | mpd_uint_t x1 = c1[i+1]; |
161 | 0 | mpd_uint_t x2 = c1[i+2]; |
162 | 0 | mpd_uint_t x3 = c1[i+3]; |
163 | 0 | MULMOD2C(&x0, &x1, n_inv); |
164 | 0 | MULMOD2C(&x2, &x3, n_inv); |
165 | 0 | c1[i] = x0; |
166 | 0 | c1[i+1] = x1; |
167 | 0 | c1[i+2] = x2; |
168 | 0 | c1[i+3] = x3; |
169 | 0 | } |
170 | |
|
171 | 0 | return 1; |
172 | 0 | } |