Coverage Report

Created: 2024-11-21 07:03

/src/mpdecimal-4.0.0/libmpdec/fourstep.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008-2024 Stefan Krah. All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 */
26
27
28
#include <assert.h>
29
30
#include "constants.h"
31
#include "fourstep.h"
32
#include "mpdecimal.h"
33
#include "numbertheory.h"
34
#include "sixstep.h"
35
#include "umodarith.h"
36
37
38
/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
39
   form 3 * 2**n (See literature/matrix-transform.txt). */
40
41
42
#ifndef PPRO
43
static inline void
44
std_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3,
45
              mpd_uint_t w3table[3], mpd_uint_t umod)
46
0
{
47
0
    mpd_uint_t r1, r2;
48
0
    mpd_uint_t w;
49
0
    mpd_uint_t s, tmp;
50
51
52
    /* k = 0 -> w = 1 */
53
0
    s = *x1;
54
0
    s = addmod(s, *x2, umod);
55
0
    s = addmod(s, *x3, umod);
56
57
0
    r1 = s;
58
59
    /* k = 1 */
60
0
    s = *x1;
61
62
0
    w = w3table[1];
63
0
    tmp = MULMOD(*x2, w);
64
0
    s = addmod(s, tmp, umod);
65
66
0
    w = w3table[2];
67
0
    tmp = MULMOD(*x3, w);
68
0
    s = addmod(s, tmp, umod);
69
70
0
    r2 = s;
71
72
    /* k = 2 */
73
0
    s = *x1;
74
75
0
    w = w3table[2];
76
0
    tmp = MULMOD(*x2, w);
77
0
    s = addmod(s, tmp, umod);
78
79
0
    w = w3table[1];
80
0
    tmp = MULMOD(*x3, w);
81
0
    s = addmod(s, tmp, umod);
82
83
0
    *x3 = s;
84
0
    *x2 = r2;
85
0
    *x1 = r1;
86
0
}
87
#else /* PPRO */
88
static inline void
89
ppro_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_uint_t w3table[3],
90
               mpd_uint_t umod, double *dmod, uint32_t dinvmod[3])
91
{
92
    mpd_uint_t r1, r2;
93
    mpd_uint_t w;
94
    mpd_uint_t s, tmp;
95
96
97
    /* k = 0 -> w = 1 */
98
    s = *x1;
99
    s = addmod(s, *x2, umod);
100
    s = addmod(s, *x3, umod);
101
102
    r1 = s;
103
104
    /* k = 1 */
105
    s = *x1;
106
107
    w = w3table[1];
108
    tmp = ppro_mulmod(*x2, w, dmod, dinvmod);
109
    s = addmod(s, tmp, umod);
110
111
    w = w3table[2];
112
    tmp = ppro_mulmod(*x3, w, dmod, dinvmod);
113
    s = addmod(s, tmp, umod);
114
115
    r2 = s;
116
117
    /* k = 2 */
118
    s = *x1;
119
120
    w = w3table[2];
121
    tmp = ppro_mulmod(*x2, w, dmod, dinvmod);
122
    s = addmod(s, tmp, umod);
123
124
    w = w3table[1];
125
    tmp = ppro_mulmod(*x3, w, dmod, dinvmod);
126
    s = addmod(s, tmp, umod);
127
128
    *x3 = s;
129
    *x2 = r2;
130
    *x1 = r1;
131
}
132
#endif
133
134
135
/* forward transform, sign = -1; transform length = 3 * 2**n */
136
int
137
four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
138
0
{
139
0
    mpd_size_t R = 3; /* number of rows */
140
0
    mpd_size_t C = n / 3; /* number of columns */
141
0
    mpd_uint_t w3table[3];
142
0
    mpd_uint_t kernel, w0, w1, wstep;
143
0
    mpd_uint_t *s, *p0, *p1, *p2;
144
0
    mpd_uint_t umod;
145
#ifdef PPRO
146
    double dmod;
147
    uint32_t dinvmod[3];
148
#endif
149
0
    mpd_size_t i, k;
150
151
152
0
    assert(n >= 48);
153
0
    assert(n <= 3*MPD_MAXTRANSFORM_2N);
154
155
156
    /* Length R transform on the columns. */
157
0
    SETMODULUS(modnum);
158
0
    _mpd_init_w3table(w3table, -1, modnum);
159
0
    for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {
160
161
0
        SIZE3_NTT(p0, p1, p2, w3table);
162
0
    }
163
164
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
165
0
    kernel = _mpd_getkernel(n, -1, modnum);
166
0
    for (i = 1; i < R; i++) {
167
0
        w0 = 1;                  /* r**(i*0): initial value for k=0 */
168
0
        w1 = POWMOD(kernel, i);  /* r**(i*1): initial value for k=1 */
169
0
        wstep = MULMOD(w1, w1);  /* r**(2*i) */
170
0
        for (k = 0; k < C-1; k += 2) {
171
0
            mpd_uint_t x0 = a[i*C+k];
172
0
            mpd_uint_t x1 = a[i*C+k+1];
173
0
            MULMOD2(&x0, w0, &x1, w1);
174
0
            MULMOD2C(&w0, &w1, wstep);  /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
175
0
            a[i*C+k] = x0;
176
0
            a[i*C+k+1] = x1;
177
0
        }
178
0
    }
179
180
    /* Length C transform on the rows. */
181
0
    for (s = a; s < a+n; s += C) {
182
0
        if (!six_step_fnt(s, C, modnum)) {
183
0
            return 0;
184
0
        }
185
0
    }
186
187
0
    return 1;
188
0
}
189
190
/* backward transform, sign = 1; transform length = 3 * 2**n */
191
int
192
inv_four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
193
0
{
194
0
    mpd_size_t R = 3; /* number of rows */
195
0
    mpd_size_t C = n / 3; /* number of columns */
196
0
    mpd_uint_t w3table[3];
197
0
    mpd_uint_t kernel, w0, w1, wstep;
198
0
    mpd_uint_t *s, *p0, *p1, *p2;
199
0
    mpd_uint_t umod;
200
#ifdef PPRO
201
    double dmod;
202
    uint32_t dinvmod[3];
203
#endif
204
0
    mpd_size_t i, k;
205
206
207
0
    assert(n >= 48);
208
0
    assert(n <= 3*MPD_MAXTRANSFORM_2N);
209
210
    /* Length C transform on the rows. */
211
0
    for (s = a; s < a+n; s += C) {
212
0
        if (!inv_six_step_fnt(s, C, modnum)) {
213
0
            return 0;
214
0
        }
215
0
    }
216
217
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
218
0
    SETMODULUS(modnum);
219
0
    kernel = _mpd_getkernel(n, 1, modnum);
220
0
    for (i = 1; i < R; i++) {
221
0
        w0 = 1;
222
0
        w1 = POWMOD(kernel, i);
223
0
        wstep = MULMOD(w1, w1);
224
0
        for (k = 0; k < C; k += 2) {
225
0
            mpd_uint_t x0 = a[i*C+k];
226
0
            mpd_uint_t x1 = a[i*C+k+1];
227
0
            MULMOD2(&x0, w0, &x1, w1);
228
0
            MULMOD2C(&w0, &w1, wstep);
229
0
            a[i*C+k] = x0;
230
0
            a[i*C+k+1] = x1;
231
0
        }
232
0
    }
233
234
    /* Length R transform on the columns. */
235
0
    _mpd_init_w3table(w3table, 1, modnum);
236
0
    for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {
237
238
0
        SIZE3_NTT(p0, p1, p2, w3table);
239
0
    }
240
241
0
    return 1;
242
0
}