/src/mpdecimal-4.0.0/libmpdec/difradix2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2008-2024 Stefan Krah. All rights reserved. |
3 | | * |
4 | | * Redistribution and use in source and binary forms, with or without |
5 | | * modification, are permitted provided that the following conditions |
6 | | * are met: |
7 | | * |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 | | * SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | |
28 | | #include <assert.h> |
29 | | |
30 | | #include "bits.h" |
31 | | #include "constants.h" |
32 | | #include "difradix2.h" |
33 | | #include "mpdecimal.h" |
34 | | #include "numbertheory.h" |
35 | | #include "umodarith.h" |
36 | | |
37 | | |
38 | | /* Bignum: The actual transform routine (decimation in frequency). */ |
39 | | |
40 | | |
41 | | /* |
42 | | * Generate index pairs (x, bitreverse(x)) and carry out the permutation. |
43 | | * n must be a power of two. |
44 | | * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational", |
45 | | * Chapter 1.14.4. [http://www.jjj.de/fxt/] |
46 | | */ |
47 | | static inline void |
48 | | bitreverse_permute(mpd_uint_t a[], mpd_size_t n) |
49 | 0 | { |
50 | 0 | mpd_size_t x = 0; |
51 | 0 | mpd_size_t r = 0; |
52 | 0 | mpd_uint_t t; |
53 | |
|
54 | 0 | do { /* Invariant: r = bitreverse(x) */ |
55 | 0 | if (r > x) { |
56 | 0 | t = a[x]; |
57 | 0 | a[x] = a[r]; |
58 | 0 | a[r] = t; |
59 | 0 | } |
60 | | /* Flip trailing consecutive 1 bits and the first zero bit |
61 | | * that absorbs a possible carry. */ |
62 | 0 | x += 1; |
63 | | /* Mirror the operation on r: Flip n_trailing_zeros(x)+1 |
64 | | high bits of r. */ |
65 | 0 | r ^= (n - (n >> (mpd_bsf(x)+1))); |
66 | | /* The loop invariant is preserved. */ |
67 | 0 | } while (x < n); |
68 | 0 | } |
69 | | |
70 | | |
71 | | /* Fast Number Theoretic Transform, decimation in frequency. */ |
72 | | void |
73 | | fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams) |
74 | 0 | { |
75 | 0 | mpd_uint_t *wtable = tparams->wtable; |
76 | 0 | mpd_uint_t umod; |
77 | | #ifdef PPRO |
78 | | double dmod; |
79 | | uint32_t dinvmod[3]; |
80 | | #endif |
81 | 0 | mpd_uint_t u0, u1, v0, v1; |
82 | 0 | mpd_uint_t w, w0, w1, wstep; |
83 | 0 | mpd_size_t m, mhalf; |
84 | 0 | mpd_size_t j, r; |
85 | | |
86 | |
|
87 | 0 | assert(ispower2(n)); |
88 | 0 | assert(n >= 4); |
89 | |
|
90 | 0 | SETMODULUS(tparams->modnum); |
91 | | |
92 | | /* m == n */ |
93 | 0 | mhalf = n / 2; |
94 | 0 | for (j = 0; j < mhalf; j += 2) { |
95 | |
|
96 | 0 | w0 = wtable[j]; |
97 | 0 | w1 = wtable[j+1]; |
98 | |
|
99 | 0 | u0 = a[j]; |
100 | 0 | v0 = a[j+mhalf]; |
101 | |
|
102 | 0 | u1 = a[j+1]; |
103 | 0 | v1 = a[j+1+mhalf]; |
104 | |
|
105 | 0 | a[j] = addmod(u0, v0, umod); |
106 | 0 | v0 = submod(u0, v0, umod); |
107 | |
|
108 | 0 | a[j+1] = addmod(u1, v1, umod); |
109 | 0 | v1 = submod(u1, v1, umod); |
110 | |
|
111 | 0 | MULMOD2(&v0, w0, &v1, w1); |
112 | |
|
113 | 0 | a[j+mhalf] = v0; |
114 | 0 | a[j+1+mhalf] = v1; |
115 | |
|
116 | 0 | } |
117 | |
|
118 | 0 | wstep = 2; |
119 | 0 | for (m = n/2; m >= 2; m>>=1, wstep<<=1) { |
120 | |
|
121 | 0 | mhalf = m / 2; |
122 | | |
123 | | /* j == 0 */ |
124 | 0 | for (r = 0; r < n; r += 2*m) { |
125 | |
|
126 | 0 | u0 = a[r]; |
127 | 0 | v0 = a[r+mhalf]; |
128 | |
|
129 | 0 | u1 = a[m+r]; |
130 | 0 | v1 = a[m+r+mhalf]; |
131 | |
|
132 | 0 | a[r] = addmod(u0, v0, umod); |
133 | 0 | v0 = submod(u0, v0, umod); |
134 | |
|
135 | 0 | a[m+r] = addmod(u1, v1, umod); |
136 | 0 | v1 = submod(u1, v1, umod); |
137 | |
|
138 | 0 | a[r+mhalf] = v0; |
139 | 0 | a[m+r+mhalf] = v1; |
140 | 0 | } |
141 | |
|
142 | 0 | for (j = 1; j < mhalf; j++) { |
143 | |
|
144 | 0 | w = wtable[j*wstep]; |
145 | |
|
146 | 0 | for (r = 0; r < n; r += 2*m) { |
147 | |
|
148 | 0 | u0 = a[r+j]; |
149 | 0 | v0 = a[r+j+mhalf]; |
150 | |
|
151 | 0 | u1 = a[m+r+j]; |
152 | 0 | v1 = a[m+r+j+mhalf]; |
153 | |
|
154 | 0 | a[r+j] = addmod(u0, v0, umod); |
155 | 0 | v0 = submod(u0, v0, umod); |
156 | |
|
157 | 0 | a[m+r+j] = addmod(u1, v1, umod); |
158 | 0 | v1 = submod(u1, v1, umod); |
159 | |
|
160 | 0 | MULMOD2C(&v0, &v1, w); |
161 | |
|
162 | 0 | a[r+j+mhalf] = v0; |
163 | 0 | a[m+r+j+mhalf] = v1; |
164 | 0 | } |
165 | |
|
166 | 0 | } |
167 | |
|
168 | 0 | } |
169 | |
|
170 | 0 | bitreverse_permute(a, n); |
171 | 0 | } |