Coverage Report

Created: 2024-11-21 07:03

/src/mpdecimal-4.0.0/libmpdec/difradix2.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008-2024 Stefan Krah. All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 */
26
27
28
#include <assert.h>
29
30
#include "bits.h"
31
#include "constants.h"
32
#include "difradix2.h"
33
#include "mpdecimal.h"
34
#include "numbertheory.h"
35
#include "umodarith.h"
36
37
38
/* Bignum: The actual transform routine (decimation in frequency). */
39
40
41
/*
42
 * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
43
 * n must be a power of two.
44
 * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
45
 * Chapter 1.14.4. [http://www.jjj.de/fxt/]
46
 */
47
static inline void
48
bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
49
0
{
50
0
    mpd_size_t x = 0;
51
0
    mpd_size_t r = 0;
52
0
    mpd_uint_t t;
53
54
0
    do { /* Invariant: r = bitreverse(x) */
55
0
        if (r > x) {
56
0
            t = a[x];
57
0
            a[x] = a[r];
58
0
            a[r] = t;
59
0
        }
60
        /* Flip trailing consecutive 1 bits and the first zero bit
61
         * that absorbs a possible carry. */
62
0
        x += 1;
63
        /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
64
           high bits of r. */
65
0
        r ^= (n - (n >> (mpd_bsf(x)+1)));
66
        /* The loop invariant is preserved. */
67
0
    } while (x < n);
68
0
}
69
70
71
/* Fast Number Theoretic Transform, decimation in frequency. */
72
void
73
fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
74
0
{
75
0
    mpd_uint_t *wtable = tparams->wtable;
76
0
    mpd_uint_t umod;
77
#ifdef PPRO
78
    double dmod;
79
    uint32_t dinvmod[3];
80
#endif
81
0
    mpd_uint_t u0, u1, v0, v1;
82
0
    mpd_uint_t w, w0, w1, wstep;
83
0
    mpd_size_t m, mhalf;
84
0
    mpd_size_t j, r;
85
86
87
0
    assert(ispower2(n));
88
0
    assert(n >= 4);
89
90
0
    SETMODULUS(tparams->modnum);
91
92
    /* m == n */
93
0
    mhalf = n / 2;
94
0
    for (j = 0; j < mhalf; j += 2) {
95
96
0
        w0 = wtable[j];
97
0
        w1 = wtable[j+1];
98
99
0
        u0 = a[j];
100
0
        v0 = a[j+mhalf];
101
102
0
        u1 = a[j+1];
103
0
        v1 = a[j+1+mhalf];
104
105
0
        a[j] = addmod(u0, v0, umod);
106
0
        v0 = submod(u0, v0, umod);
107
108
0
        a[j+1] = addmod(u1, v1, umod);
109
0
        v1 = submod(u1, v1, umod);
110
111
0
        MULMOD2(&v0, w0, &v1, w1);
112
113
0
        a[j+mhalf] = v0;
114
0
        a[j+1+mhalf] = v1;
115
116
0
    }
117
118
0
    wstep = 2;
119
0
    for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
120
121
0
        mhalf = m / 2;
122
123
        /* j == 0 */
124
0
        for (r = 0; r < n; r += 2*m) {
125
126
0
            u0 = a[r];
127
0
            v0 = a[r+mhalf];
128
129
0
            u1 = a[m+r];
130
0
            v1 = a[m+r+mhalf];
131
132
0
            a[r] = addmod(u0, v0, umod);
133
0
            v0 = submod(u0, v0, umod);
134
135
0
            a[m+r] = addmod(u1, v1, umod);
136
0
            v1 = submod(u1, v1, umod);
137
138
0
            a[r+mhalf] = v0;
139
0
            a[m+r+mhalf] = v1;
140
0
        }
141
142
0
        for (j = 1; j < mhalf; j++) {
143
144
0
            w = wtable[j*wstep];
145
146
0
            for (r = 0; r < n; r += 2*m) {
147
148
0
                u0 = a[r+j];
149
0
                v0 = a[r+j+mhalf];
150
151
0
                u1 = a[m+r+j];
152
0
                v1 = a[m+r+j+mhalf];
153
154
0
                a[r+j] = addmod(u0, v0, umod);
155
0
                v0 = submod(u0, v0, umod);
156
157
0
                a[m+r+j] = addmod(u1, v1, umod);
158
0
                v1 = submod(u1, v1, umod);
159
160
0
                MULMOD2C(&v0, &v1, w);
161
162
0
                a[r+j+mhalf] = v0;
163
0
                a[m+r+j+mhalf] = v1;
164
0
            }
165
166
0
        }
167
168
0
    }
169
170
0
    bitreverse_permute(a, n);
171
0
}