Coverage Report

Created: 2024-11-21 07:03

/src/mpdecimal-4.0.0/libmpdec/sixstep.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008-2024 Stefan Krah. All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 */
26
27
28
#include <assert.h>
29
#include <stdio.h>
30
31
#include "bits.h"
32
#include "constants.h"
33
#include "difradix2.h"
34
#include "numbertheory.h"
35
#include "mpdecimal.h"
36
#include "sixstep.h"
37
#include "transpose.h"
38
#include "umodarith.h"
39
40
41
/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
42
   form 2**n (See literature/six-step.txt). */
43
44
45
/* forward transform with sign = -1 */
46
int
47
six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
48
0
{
49
0
    struct fnt_params *tparams;
50
0
    mpd_size_t log2n, C, R;
51
0
    mpd_uint_t kernel;
52
0
    mpd_uint_t umod;
53
#ifdef PPRO
54
    double dmod;
55
    uint32_t dinvmod[3];
56
#endif
57
0
    mpd_uint_t *x, w0, w1, wstep;
58
0
    mpd_size_t i, k;
59
60
61
0
    assert(ispower2(n));
62
0
    assert(n >= 16);
63
0
    assert(n <= MPD_MAXTRANSFORM_2N);
64
65
0
    log2n = mpd_bsr(n);
66
0
    C = ((mpd_size_t)1) << (log2n / 2);  /* number of columns */
67
0
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
68
69
70
    /* Transpose the matrix. */
71
0
    if (!transpose_pow2(a, R, C)) {
72
0
        return 0;
73
0
    }
74
75
    /* Length R transform on the rows. */
76
0
    if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
77
0
        return 0;
78
0
    }
79
0
    for (x = a; x < a+n; x += R) {
80
0
        fnt_dif2(x, R, tparams);
81
0
    }
82
83
    /* Transpose the matrix. */
84
0
    if (!transpose_pow2(a, C, R)) {
85
0
        mpd_free(tparams);
86
0
        return 0;
87
0
    }
88
89
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
90
0
    SETMODULUS(modnum);
91
0
    kernel = _mpd_getkernel(n, -1, modnum);
92
0
    for (i = 1; i < R; i++) {
93
0
        w0 = 1;                  /* r**(i*0): initial value for k=0 */
94
0
        w1 = POWMOD(kernel, i);  /* r**(i*1): initial value for k=1 */
95
0
        wstep = MULMOD(w1, w1);  /* r**(2*i) */
96
0
        for (k = 0; k < C; k += 2) {
97
0
            mpd_uint_t x0 = a[i*C+k];
98
0
            mpd_uint_t x1 = a[i*C+k+1];
99
0
            MULMOD2(&x0, w0, &x1, w1);
100
0
            MULMOD2C(&w0, &w1, wstep);  /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
101
0
            a[i*C+k] = x0;
102
0
            a[i*C+k+1] = x1;
103
0
        }
104
0
    }
105
106
    /* Length C transform on the rows. */
107
0
    if (C != R) {
108
0
        mpd_free(tparams);
109
0
        if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
110
0
            return 0;
111
0
        }
112
0
    }
113
0
    for (x = a; x < a+n; x += C) {
114
0
        fnt_dif2(x, C, tparams);
115
0
    }
116
0
    mpd_free(tparams);
117
118
#if 0
119
    /* An unordered transform is sufficient for convolution. */
120
    /* Transpose the matrix. */
121
    if (!transpose_pow2(a, R, C)) {
122
        return 0;
123
    }
124
#endif
125
126
0
    return 1;
127
0
}
128
129
130
/* reverse transform, sign = 1 */
131
int
132
inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
133
0
{
134
0
    struct fnt_params *tparams;
135
0
    mpd_size_t log2n, C, R;
136
0
    mpd_uint_t kernel;
137
0
    mpd_uint_t umod;
138
#ifdef PPRO
139
    double dmod;
140
    uint32_t dinvmod[3];
141
#endif
142
0
    mpd_uint_t *x, w0, w1, wstep;
143
0
    mpd_size_t i, k;
144
145
146
0
    assert(ispower2(n));
147
0
    assert(n >= 16);
148
0
    assert(n <= MPD_MAXTRANSFORM_2N);
149
150
0
    log2n = mpd_bsr(n);
151
0
    C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
152
0
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
153
154
155
#if 0
156
    /* An unordered transform is sufficient for convolution. */
157
    /* Transpose the matrix, producing an R*C matrix. */
158
    if (!transpose_pow2(a, C, R)) {
159
        return 0;
160
    }
161
#endif
162
163
    /* Length C transform on the rows. */
164
0
    if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
165
0
        return 0;
166
0
    }
167
0
    for (x = a; x < a+n; x += C) {
168
0
        fnt_dif2(x, C, tparams);
169
0
    }
170
171
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
172
0
    SETMODULUS(modnum);
173
0
    kernel = _mpd_getkernel(n, 1, modnum);
174
0
    for (i = 1; i < R; i++) {
175
0
        w0 = 1;
176
0
        w1 = POWMOD(kernel, i);
177
0
        wstep = MULMOD(w1, w1);
178
0
        for (k = 0; k < C; k += 2) {
179
0
            mpd_uint_t x0 = a[i*C+k];
180
0
            mpd_uint_t x1 = a[i*C+k+1];
181
0
            MULMOD2(&x0, w0, &x1, w1);
182
0
            MULMOD2C(&w0, &w1, wstep);
183
0
            a[i*C+k] = x0;
184
0
            a[i*C+k+1] = x1;
185
0
        }
186
0
    }
187
188
    /* Transpose the matrix. */
189
0
    if (!transpose_pow2(a, R, C)) {
190
0
        mpd_free(tparams);
191
0
        return 0;
192
0
    }
193
194
    /* Length R transform on the rows. */
195
0
    if (R != C) {
196
0
        mpd_free(tparams);
197
0
        if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {
198
0
            return 0;
199
0
        }
200
0
    }
201
0
    for (x = a; x < a+n; x += R) {
202
0
        fnt_dif2(x, R, tparams);
203
0
    }
204
0
    mpd_free(tparams);
205
206
    /* Transpose the matrix. */
207
0
    if (!transpose_pow2(a, C, R)) {
208
0
        return 0;
209
0
    }
210
211
0
    return 1;
212
0
}