/src/mpdecimal-4.0.0/libmpdec/sixstep.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2008-2024 Stefan Krah. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */


#include <assert.h>
#include <stdio.h>

#include "bits.h"
#include "constants.h"
#include "difradix2.h"
#include "numbertheory.h"
#include "mpdecimal.h"
#include "sixstep.h"
#include "transpose.h"
#include "umodarith.h"


/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
   form 2**n (See literature/six-step.txt). */


/* forward transform with sign = -1 */
int
six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
{
    struct fnt_params *tparams;
    mpd_size_t log2n, C, R;
    mpd_uint_t kernel;
    mpd_uint_t umod;
#ifdef PPRO
    double dmod;
    uint32_t dinvmod[3];
#endif
    mpd_uint_t *x, w0, w1, wstep;
    mpd_size_t i, k;


    assert(ispower2(n));
    assert(n >= 16);
    assert(n <= MPD_MAXTRANSFORM_2N);

    log2n = mpd_bsr(n);
    C = ((mpd_size_t)1) << (log2n / 2);  /* number of columns */
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */


    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        return 0;
    }

    /* Length R transform on the rows. */
    if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
        return 0;
    }
    for (x = a; x < a+n; x += R) {
        fnt_dif2(x, R, tparams);
    }

    /* Transpose the matrix. */
    if (!transpose_pow2(a, C, R)) {
        mpd_free(tparams);
        return 0;
    }

    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, -1, modnum);
    for (i = 1; i < R; i++) {
        w0 = 1;                  /* r**(i*0): initial value for k=0 */
        w1 = POWMOD(kernel, i);  /* r**(i*1): initial value for k=1 */
        wstep = MULMOD(w1, w1);  /* r**(2*i) */
        for (k = 0; k < C; k += 2) {
            mpd_uint_t x0 = a[i*C+k];
            mpd_uint_t x1 = a[i*C+k+1];
            MULMOD2(&x0, w0, &x1, w1);
            MULMOD2C(&w0, &w1, wstep);  /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
            a[i*C+k] = x0;
            a[i*C+k+1] = x1;
        }
    }

    /* Length C transform on the rows. */
    if (C != R) {
        mpd_free(tparams);
        if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
            return 0;
        }
    }
    for (x = a; x < a+n; x += C) {
        fnt_dif2(x, C, tparams);
    }
    mpd_free(tparams);

#if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        return 0;
    }
#endif

    return 1;
}


/* reverse transform, sign = 1 */
int
inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
{
    struct fnt_params *tparams;
    mpd_size_t log2n, C, R;
    mpd_uint_t kernel;
    mpd_uint_t umod;
#ifdef PPRO
    double dmod;
    uint32_t dinvmod[3];
#endif
    mpd_uint_t *x, w0, w1, wstep;
    mpd_size_t i, k;


    assert(ispower2(n));
    assert(n >= 16);
    assert(n <= MPD_MAXTRANSFORM_2N);

    log2n = mpd_bsr(n);
    C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */


#if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix, producing an R*C matrix. */
    if (!transpose_pow2(a, C, R)) {
        return 0;
    }
#endif

    /* Length C transform on the rows. */
    if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
        return 0;
    }
    for (x = a; x < a+n; x += C) {
        fnt_dif2(x, C, tparams);
    }

    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, 1, modnum);
    for (i = 1; i < R; i++) {
        w0 = 1;
        w1 = POWMOD(kernel, i);
        wstep = MULMOD(w1, w1);
        for (k = 0; k < C; k += 2) {
            mpd_uint_t x0 = a[i*C+k];
            mpd_uint_t x1 = a[i*C+k+1];
            MULMOD2(&x0, w0, &x1, w1);
            MULMOD2C(&w0, &w1, wstep);
            a[i*C+k] = x0;
            a[i*C+k+1] = x1;
        }
    }

    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        mpd_free(tparams);
        return 0;
    }

    /* Length R transform on the rows. */
    if (R != C) {
        mpd_free(tparams);
        if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {
            return 0;
        }
    }
    for (x = a; x < a+n; x += R) {
        fnt_dif2(x, R, tparams);
    }
    mpd_free(tparams);

    /* Transpose the matrix. */
    if (!transpose_pow2(a, C, R)) {
        return 0;
    }

    return 1;
}

Coverage Report

Created: 2024-11-21 07:03

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2008-2024 Stefan Krah. All rights reserved.
3		*
4		* Redistribution and use in source and binary forms, with or without
5		* modification, are permitted provided that the following conditions
6		* are met:
7		*
8		* 1. Redistributions of source code must retain the above copyright
9		* notice, this list of conditions and the following disclaimer.
10		* 2. Redistributions in binary form must reproduce the above copyright
11		* notice, this list of conditions and the following disclaimer in the
12		* documentation and/or other materials provided with the distribution.
13		*
14		* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15		* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16		* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17		* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18		* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19		* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20		* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21		* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22		* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23		* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24		* SUCH DAMAGE.
25		*/
26
27
28		#include <assert.h>
29		#include <stdio.h>
30
31		#include "bits.h"
32		#include "constants.h"
33		#include "difradix2.h"
34		#include "numbertheory.h"
35		#include "mpdecimal.h"
36		#include "sixstep.h"
37		#include "transpose.h"
38		#include "umodarith.h"
39
40
41		/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
42		form 2*n (See literature/six-step.txt). /
43
44
45		/* forward transform with sign = -1 */
46		int
47		six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
48	0	{
49	0	struct fnt_params *tparams;
50	0	mpd_size_t log2n, C, R;
51	0	mpd_uint_t kernel;
52	0	mpd_uint_t umod;
53		#ifdef PPRO
54		double dmod;
55		uint32_t dinvmod[3];
56		#endif
57	0	mpd_uint_t *x, w0, w1, wstep;
58	0	mpd_size_t i, k;
59
60
61	0	assert(ispower2(n));
62	0	assert(n >= 16);
63	0	assert(n <= MPD_MAXTRANSFORM_2N);
64
65	0	log2n = mpd_bsr(n);
66	0	C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
67	0	R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
68
69
70		/* Transpose the matrix. */
71	0	if (!transpose_pow2(a, R, C)) {
72	0	return 0;
73	0	}
74
75		/* Length R transform on the rows. */
76	0	if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
77	0	return 0;
78	0	}
79	0	for (x = a; x < a+n; x += R) {
80	0	fnt_dif2(x, R, tparams);
81	0	}
82
83		/* Transpose the matrix. */
84	0	if (!transpose_pow2(a, C, R)) {
85	0	mpd_free(tparams);
86	0	return 0;
87	0	}
88
89		/* Multiply each matrix element (addressed by iC+k) by r(ik). */
90	0	SETMODULUS(modnum);
91	0	kernel = _mpd_getkernel(n, -1, modnum);
92	0	for (i = 1; i < R; i++) {
93	0	w0 = 1; /* r*(i0): initial value for k=0 */
94	0	w1 = POWMOD(kernel, i); /* r*(i1): initial value for k=1 */
95	0	wstep = MULMOD(w1, w1); /* r*(2i) */
96	0	for (k = 0; k < C; k += 2) {
97	0	mpd_uint_t x0 = a[i*C+k];
98	0	mpd_uint_t x1 = a[i*C+k+1];
99	0	MULMOD2(&x0, w0, &x1, w1);
100	0	MULMOD2C(&w0, &w1, wstep); /* r*(i(k+2)) = r*(ik) * r*(2i) */
101	0	a[i*C+k] = x0;
102	0	a[i*C+k+1] = x1;
103	0	}
104	0	}
105
106		/* Length C transform on the rows. */
107	0	if (C != R) {
108	0	mpd_free(tparams);
109	0	if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
110	0	return 0;
111	0	}
112	0	}
113	0	for (x = a; x < a+n; x += C) {
114	0	fnt_dif2(x, C, tparams);
115	0	}
116	0	mpd_free(tparams);
117
118		#if 0
119		/* An unordered transform is sufficient for convolution. */
120		/* Transpose the matrix. */
121		if (!transpose_pow2(a, R, C)) {
122		return 0;
123		}
124		#endif
125
126	0	return 1;
127	0	}
128
129
130		/* reverse transform, sign = 1 */
131		int
132		inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
133	0	{
134	0	struct fnt_params *tparams;
135	0	mpd_size_t log2n, C, R;
136	0	mpd_uint_t kernel;
137	0	mpd_uint_t umod;
138		#ifdef PPRO
139		double dmod;
140		uint32_t dinvmod[3];
141		#endif
142	0	mpd_uint_t *x, w0, w1, wstep;
143	0	mpd_size_t i, k;
144
145
146	0	assert(ispower2(n));
147	0	assert(n >= 16);
148	0	assert(n <= MPD_MAXTRANSFORM_2N);
149
150	0	log2n = mpd_bsr(n);
151	0	C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
152	0	R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
153
154
155		#if 0
156		/* An unordered transform is sufficient for convolution. */
157		/* Transpose the matrix, producing an RC matrix. /
158		if (!transpose_pow2(a, C, R)) {
159		return 0;
160		}
161		#endif
162
163		/* Length C transform on the rows. */
164	0	if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
165	0	return 0;
166	0	}
167	0	for (x = a; x < a+n; x += C) {
168	0	fnt_dif2(x, C, tparams);
169	0	}
170
171		/* Multiply each matrix element (addressed by iC+k) by r(ik). */
172	0	SETMODULUS(modnum);
173	0	kernel = _mpd_getkernel(n, 1, modnum);
174	0	for (i = 1; i < R; i++) {
175	0	w0 = 1;
176	0	w1 = POWMOD(kernel, i);
177	0	wstep = MULMOD(w1, w1);
178	0	for (k = 0; k < C; k += 2) {
179	0	mpd_uint_t x0 = a[i*C+k];
180	0	mpd_uint_t x1 = a[i*C+k+1];
181	0	MULMOD2(&x0, w0, &x1, w1);
182	0	MULMOD2C(&w0, &w1, wstep);
183	0	a[i*C+k] = x0;
184	0	a[i*C+k+1] = x1;
185	0	}
186	0	}
187
188		/* Transpose the matrix. */
189	0	if (!transpose_pow2(a, R, C)) {
190	0	mpd_free(tparams);
191	0	return 0;
192	0	}
193
194		/* Length R transform on the rows. */
195	0	if (R != C) {
196	0	mpd_free(tparams);
197	0	if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {
198	0	return 0;
199	0	}
200	0	}
201	0	for (x = a; x < a+n; x += R) {
202	0	fnt_dif2(x, R, tparams);
203	0	}
204	0	mpd_free(tparams);
205
206		/* Transpose the matrix. */
207	0	if (!transpose_pow2(a, C, R)) {
208	0	return 0;
209	0	}
210
211	0	return 1;
212	0	}