Coverage Report

Created: 2025-10-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/rdft/scalar/r2cf/hf_10.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Fri Oct 10 06:58:47 UTC 2025 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hf_10 -include rdft/scalar/hf.h */
29
30
/*
31
 * This function contains 102 FP additions, 72 FP multiplications,
32
 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
33
 * 47 stack variables, 4 constants, and 40 memory accesses
34
 */
35
#include "rdft/scalar/hf.h"
36
37
static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
42
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
43
     {
44
    INT m;
45
    for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
46
         E T8, T23, T12, T1U, TM, TZ, T10, T1F, T1G, T25, T16, T17, T18, T1s, T1x;
47
         E T1P, Tl, Ty, Tz, T1I, T1J, T24, T13, T14, T15, T1h, T1m, T1O;
48
         {
49
        E T1, T1R, T3, T6, T4, T1S, T2, T7, T1T, T5;
50
        T1 = cr[0];
51
        T1R = ci[0];
52
        T3 = cr[WS(rs, 5)];
53
        T6 = ci[WS(rs, 5)];
54
        T2 = W[8];
55
        T4 = T2 * T3;
56
        T1S = T2 * T6;
57
        T5 = W[9];
58
        T7 = FMA(T5, T6, T4);
59
        T1T = FNMS(T5, T3, T1S);
60
        T8 = T1 - T7;
61
        T23 = T1T + T1R;
62
        T12 = T1 + T7;
63
        T1U = T1R - T1T;
64
         }
65
         {
66
        E TF, T1w, TY, T1p, TL, T1u, TS, T1r;
67
        {
68
       E TB, TE, TC, T1v, TA, TD;
69
       TB = cr[WS(rs, 4)];
70
       TE = ci[WS(rs, 4)];
71
       TA = W[6];
72
       TC = TA * TB;
73
       T1v = TA * TE;
74
       TD = W[7];
75
       TF = FMA(TD, TE, TC);
76
       T1w = FNMS(TD, TB, T1v);
77
        }
78
        {
79
       E TU, TX, TV, T1o, TT, TW;
80
       TU = cr[WS(rs, 1)];
81
       TX = ci[WS(rs, 1)];
82
       TT = W[0];
83
       TV = TT * TU;
84
       T1o = TT * TX;
85
       TW = W[1];
86
       TY = FMA(TW, TX, TV);
87
       T1p = FNMS(TW, TU, T1o);
88
        }
89
        {
90
       E TH, TK, TI, T1t, TG, TJ;
91
       TH = cr[WS(rs, 9)];
92
       TK = ci[WS(rs, 9)];
93
       TG = W[16];
94
       TI = TG * TH;
95
       T1t = TG * TK;
96
       TJ = W[17];
97
       TL = FMA(TJ, TK, TI);
98
       T1u = FNMS(TJ, TH, T1t);
99
        }
100
        {
101
       E TO, TR, TP, T1q, TN, TQ;
102
       TO = cr[WS(rs, 6)];
103
       TR = ci[WS(rs, 6)];
104
       TN = W[10];
105
       TP = TN * TO;
106
       T1q = TN * TR;
107
       TQ = W[11];
108
       TS = FMA(TQ, TR, TP);
109
       T1r = FNMS(TQ, TO, T1q);
110
        }
111
        TM = TF - TL;
112
        TZ = TS - TY;
113
        T10 = TM + TZ;
114
        T1F = T1w + T1u;
115
        T1G = T1r + T1p;
116
        T25 = T1F + T1G;
117
        T16 = TF + TL;
118
        T17 = TS + TY;
119
        T18 = T16 + T17;
120
        T1s = T1p - T1r;
121
        T1x = T1u - T1w;
122
        T1P = T1x + T1s;
123
         }
124
         {
125
        E Te, T1l, Tx, T1e, Tk, T1j, Tr, T1g;
126
        {
127
       E Ta, Td, Tb, T1k, T9, Tc;
128
       Ta = cr[WS(rs, 2)];
129
       Td = ci[WS(rs, 2)];
130
       T9 = W[2];
131
       Tb = T9 * Ta;
132
       T1k = T9 * Td;
133
       Tc = W[3];
134
       Te = FMA(Tc, Td, Tb);
135
       T1l = FNMS(Tc, Ta, T1k);
136
        }
137
        {
138
       E Tt, Tw, Tu, T1d, Ts, Tv;
139
       Tt = cr[WS(rs, 3)];
140
       Tw = ci[WS(rs, 3)];
141
       Ts = W[4];
142
       Tu = Ts * Tt;
143
       T1d = Ts * Tw;
144
       Tv = W[5];
145
       Tx = FMA(Tv, Tw, Tu);
146
       T1e = FNMS(Tv, Tt, T1d);
147
        }
148
        {
149
       E Tg, Tj, Th, T1i, Tf, Ti;
150
       Tg = cr[WS(rs, 7)];
151
       Tj = ci[WS(rs, 7)];
152
       Tf = W[12];
153
       Th = Tf * Tg;
154
       T1i = Tf * Tj;
155
       Ti = W[13];
156
       Tk = FMA(Ti, Tj, Th);
157
       T1j = FNMS(Ti, Tg, T1i);
158
        }
159
        {
160
       E Tn, Tq, To, T1f, Tm, Tp;
161
       Tn = cr[WS(rs, 8)];
162
       Tq = ci[WS(rs, 8)];
163
       Tm = W[14];
164
       To = Tm * Tn;
165
       T1f = Tm * Tq;
166
       Tp = W[15];
167
       Tr = FMA(Tp, Tq, To);
168
       T1g = FNMS(Tp, Tn, T1f);
169
        }
170
        Tl = Te - Tk;
171
        Ty = Tr - Tx;
172
        Tz = Tl + Ty;
173
        T1I = T1l + T1j;
174
        T1J = T1g + T1e;
175
        T24 = T1I + T1J;
176
        T13 = Te + Tk;
177
        T14 = Tr + Tx;
178
        T15 = T13 + T14;
179
        T1h = T1e - T1g;
180
        T1m = T1j - T1l;
181
        T1O = T1m + T1h;
182
         }
183
         {
184
        E T1b, T11, T1a, T1z, T1B, T1n, T1y, T1A, T1c;
185
        T1b = Tz - T10;
186
        T11 = Tz + T10;
187
        T1a = FNMS(KP250000000, T11, T8);
188
        T1n = T1h - T1m;
189
        T1y = T1s - T1x;
190
        T1z = FMA(KP618033988, T1y, T1n);
191
        T1B = FNMS(KP618033988, T1n, T1y);
192
        ci[WS(rs, 4)] = T8 + T11;
193
        T1A = FNMS(KP559016994, T1b, T1a);
194
        ci[WS(rs, 2)] = FNMS(KP951056516, T1B, T1A);
195
        cr[WS(rs, 3)] = FMA(KP951056516, T1B, T1A);
196
        T1c = FMA(KP559016994, T1b, T1a);
197
        ci[0] = FNMS(KP951056516, T1z, T1c);
198
        cr[WS(rs, 1)] = FMA(KP951056516, T1z, T1c);
199
         }
200
         {
201
        E T1D, T19, T1C, T1L, T1N, T1H, T1K, T1M, T1E;
202
        T1D = T15 - T18;
203
        T19 = T15 + T18;
204
        T1C = FNMS(KP250000000, T19, T12);
205
        T1H = T1F - T1G;
206
        T1K = T1I - T1J;
207
        T1L = FNMS(KP618033988, T1K, T1H);
208
        T1N = FMA(KP618033988, T1H, T1K);
209
        cr[0] = T12 + T19;
210
        T1M = FMA(KP559016994, T1D, T1C);
211
        cr[WS(rs, 4)] = FNMS(KP951056516, T1N, T1M);
212
        ci[WS(rs, 3)] = FMA(KP951056516, T1N, T1M);
213
        T1E = FNMS(KP559016994, T1D, T1C);
214
        cr[WS(rs, 2)] = FNMS(KP951056516, T1L, T1E);
215
        ci[WS(rs, 1)] = FMA(KP951056516, T1L, T1E);
216
         }
217
         {
218
        E T1W, T1Q, T1V, T20, T22, T1Y, T1Z, T21, T1X;
219
        T1W = T1P - T1O;
220
        T1Q = T1O + T1P;
221
        T1V = FMA(KP250000000, T1Q, T1U);
222
        T1Y = TZ - TM;
223
        T1Z = Ty - Tl;
224
        T20 = FNMS(KP618033988, T1Z, T1Y);
225
        T22 = FMA(KP618033988, T1Y, T1Z);
226
        cr[WS(rs, 5)] = T1Q - T1U;
227
        T21 = FMA(KP559016994, T1W, T1V);
228
        cr[WS(rs, 9)] = FMS(KP951056516, T22, T21);
229
        ci[WS(rs, 8)] = FMA(KP951056516, T22, T21);
230
        T1X = FNMS(KP559016994, T1W, T1V);
231
        cr[WS(rs, 7)] = FMS(KP951056516, T20, T1X);
232
        ci[WS(rs, 6)] = FMA(KP951056516, T20, T1X);
233
         }
234
         {
235
        E T28, T26, T27, T2c, T2e, T2a, T2b, T2d, T29;
236
        T28 = T24 - T25;
237
        T26 = T24 + T25;
238
        T27 = FNMS(KP250000000, T26, T23);
239
        T2a = T13 - T14;
240
        T2b = T16 - T17;
241
        T2c = FMA(KP618033988, T2b, T2a);
242
        T2e = FNMS(KP618033988, T2a, T2b);
243
        ci[WS(rs, 9)] = T26 + T23;
244
        T2d = FNMS(KP559016994, T28, T27);
245
        cr[WS(rs, 8)] = FMS(KP951056516, T2e, T2d);
246
        ci[WS(rs, 7)] = FMA(KP951056516, T2e, T2d);
247
        T29 = FMA(KP559016994, T28, T27);
248
        cr[WS(rs, 6)] = FMS(KP951056516, T2c, T29);
249
        ci[WS(rs, 5)] = FMA(KP951056516, T2c, T29);
250
         }
251
    }
252
     }
253
}
254
255
static const tw_instr twinstr[] = {
256
     { TW_FULL, 1, 10 },
257
     { TW_NEXT, 1, 0 }
258
};
259
260
static const hc2hc_desc desc = { 10, "hf_10", twinstr, &GENUS, { 48, 18, 54, 0 } };
261
262
void X(codelet_hf_10) (planner *p) {
263
     X(khc2hc_register) (p, hf_10, &desc);
264
}
265
#else
266
267
/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hf_10 -include rdft/scalar/hf.h */
268
269
/*
270
 * This function contains 102 FP additions, 60 FP multiplications,
271
 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
272
 * 45 stack variables, 4 constants, and 40 memory accesses
273
 */
274
#include "rdft/scalar/hf.h"
275
276
static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
277
0
{
278
0
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
279
0
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
280
0
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
281
0
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
282
0
     {
283
0
    INT m;
284
0
    for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
285
0
         E T7, T1R, TT, T1C, TF, TQ, TR, T1o, T1p, T1P, TX, TY, TZ, T1d, T1g;
286
0
         E T1x, Ti, Tt, Tu, T1r, T1s, T1O, TU, TV, TW, T16, T19, T1y;
287
0
         {
288
0
        E T1, T1A, T6, T1B;
289
0
        T1 = cr[0];
290
0
        T1A = ci[0];
291
0
        {
292
0
       E T3, T5, T2, T4;
293
0
       T3 = cr[WS(rs, 5)];
294
0
       T5 = ci[WS(rs, 5)];
295
0
       T2 = W[8];
296
0
       T4 = W[9];
297
0
       T6 = FMA(T2, T3, T4 * T5);
298
0
       T1B = FNMS(T4, T3, T2 * T5);
299
0
        }
300
0
        T7 = T1 - T6;
301
0
        T1R = T1B + T1A;
302
0
        TT = T1 + T6;
303
0
        T1C = T1A - T1B;
304
0
         }
305
0
         {
306
0
        E Tz, T1b, TP, T1e, TE, T1c, TK, T1f;
307
0
        {
308
0
       E Tw, Ty, Tv, Tx;
309
0
       Tw = cr[WS(rs, 4)];
310
0
       Ty = ci[WS(rs, 4)];
311
0
       Tv = W[6];
312
0
       Tx = W[7];
313
0
       Tz = FMA(Tv, Tw, Tx * Ty);
314
0
       T1b = FNMS(Tx, Tw, Tv * Ty);
315
0
        }
316
0
        {
317
0
       E TM, TO, TL, TN;
318
0
       TM = cr[WS(rs, 1)];
319
0
       TO = ci[WS(rs, 1)];
320
0
       TL = W[0];
321
0
       TN = W[1];
322
0
       TP = FMA(TL, TM, TN * TO);
323
0
       T1e = FNMS(TN, TM, TL * TO);
324
0
        }
325
0
        {
326
0
       E TB, TD, TA, TC;
327
0
       TB = cr[WS(rs, 9)];
328
0
       TD = ci[WS(rs, 9)];
329
0
       TA = W[16];
330
0
       TC = W[17];
331
0
       TE = FMA(TA, TB, TC * TD);
332
0
       T1c = FNMS(TC, TB, TA * TD);
333
0
        }
334
0
        {
335
0
       E TH, TJ, TG, TI;
336
0
       TH = cr[WS(rs, 6)];
337
0
       TJ = ci[WS(rs, 6)];
338
0
       TG = W[10];
339
0
       TI = W[11];
340
0
       TK = FMA(TG, TH, TI * TJ);
341
0
       T1f = FNMS(TI, TH, TG * TJ);
342
0
        }
343
0
        TF = Tz - TE;
344
0
        TQ = TK - TP;
345
0
        TR = TF + TQ;
346
0
        T1o = T1b + T1c;
347
0
        T1p = T1f + T1e;
348
0
        T1P = T1o + T1p;
349
0
        TX = Tz + TE;
350
0
        TY = TK + TP;
351
0
        TZ = TX + TY;
352
0
        T1d = T1b - T1c;
353
0
        T1g = T1e - T1f;
354
0
        T1x = T1g - T1d;
355
0
         }
356
0
         {
357
0
        E Tc, T14, Ts, T18, Th, T15, Tn, T17;
358
0
        {
359
0
       E T9, Tb, T8, Ta;
360
0
       T9 = cr[WS(rs, 2)];
361
0
       Tb = ci[WS(rs, 2)];
362
0
       T8 = W[2];
363
0
       Ta = W[3];
364
0
       Tc = FMA(T8, T9, Ta * Tb);
365
0
       T14 = FNMS(Ta, T9, T8 * Tb);
366
0
        }
367
0
        {
368
0
       E Tp, Tr, To, Tq;
369
0
       Tp = cr[WS(rs, 3)];
370
0
       Tr = ci[WS(rs, 3)];
371
0
       To = W[4];
372
0
       Tq = W[5];
373
0
       Ts = FMA(To, Tp, Tq * Tr);
374
0
       T18 = FNMS(Tq, Tp, To * Tr);
375
0
        }
376
0
        {
377
0
       E Te, Tg, Td, Tf;
378
0
       Te = cr[WS(rs, 7)];
379
0
       Tg = ci[WS(rs, 7)];
380
0
       Td = W[12];
381
0
       Tf = W[13];
382
0
       Th = FMA(Td, Te, Tf * Tg);
383
0
       T15 = FNMS(Tf, Te, Td * Tg);
384
0
        }
385
0
        {
386
0
       E Tk, Tm, Tj, Tl;
387
0
       Tk = cr[WS(rs, 8)];
388
0
       Tm = ci[WS(rs, 8)];
389
0
       Tj = W[14];
390
0
       Tl = W[15];
391
0
       Tn = FMA(Tj, Tk, Tl * Tm);
392
0
       T17 = FNMS(Tl, Tk, Tj * Tm);
393
0
        }
394
0
        Ti = Tc - Th;
395
0
        Tt = Tn - Ts;
396
0
        Tu = Ti + Tt;
397
0
        T1r = T14 + T15;
398
0
        T1s = T17 + T18;
399
0
        T1O = T1r + T1s;
400
0
        TU = Tc + Th;
401
0
        TV = Tn + Ts;
402
0
        TW = TU + TV;
403
0
        T16 = T14 - T15;
404
0
        T19 = T17 - T18;
405
0
        T1y = T16 + T19;
406
0
         }
407
0
         {
408
0
        E T11, TS, T12, T1i, T1k, T1a, T1h, T1j, T13;
409
0
        T11 = KP559016994 * (Tu - TR);
410
0
        TS = Tu + TR;
411
0
        T12 = FNMS(KP250000000, TS, T7);
412
0
        T1a = T16 - T19;
413
0
        T1h = T1d + T1g;
414
0
        T1i = FMA(KP951056516, T1a, KP587785252 * T1h);
415
0
        T1k = FNMS(KP587785252, T1a, KP951056516 * T1h);
416
0
        ci[WS(rs, 4)] = T7 + TS;
417
0
        T1j = T12 - T11;
418
0
        ci[WS(rs, 2)] = T1j - T1k;
419
0
        cr[WS(rs, 3)] = T1j + T1k;
420
0
        T13 = T11 + T12;
421
0
        ci[0] = T13 - T1i;
422
0
        cr[WS(rs, 1)] = T13 + T1i;
423
0
         }
424
0
         {
425
0
        E T1m, T10, T1l, T1u, T1w, T1q, T1t, T1v, T1n;
426
0
        T1m = KP559016994 * (TW - TZ);
427
0
        T10 = TW + TZ;
428
0
        T1l = FNMS(KP250000000, T10, TT);
429
0
        T1q = T1o - T1p;
430
0
        T1t = T1r - T1s;
431
0
        T1u = FNMS(KP587785252, T1t, KP951056516 * T1q);
432
0
        T1w = FMA(KP951056516, T1t, KP587785252 * T1q);
433
0
        cr[0] = TT + T10;
434
0
        T1v = T1m + T1l;
435
0
        cr[WS(rs, 4)] = T1v - T1w;
436
0
        ci[WS(rs, 3)] = T1v + T1w;
437
0
        T1n = T1l - T1m;
438
0
        cr[WS(rs, 2)] = T1n - T1u;
439
0
        ci[WS(rs, 1)] = T1n + T1u;
440
0
         }
441
0
         {
442
0
        E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
443
0
        T1H = KP559016994 * (T1y + T1x);
444
0
        T1z = T1x - T1y;
445
0
        T1G = FMA(KP250000000, T1z, T1C);
446
0
        T1D = Ti - Tt;
447
0
        T1E = TQ - TF;
448
0
        T1F = FMA(KP587785252, T1D, KP951056516 * T1E);
449
0
        T1J = FNMS(KP951056516, T1D, KP587785252 * T1E);
450
0
        cr[WS(rs, 5)] = T1z - T1C;
451
0
        T1K = T1H + T1G;
452
0
        cr[WS(rs, 9)] = T1J - T1K;
453
0
        ci[WS(rs, 8)] = T1J + T1K;
454
0
        T1I = T1G - T1H;
455
0
        cr[WS(rs, 7)] = T1F - T1I;
456
0
        ci[WS(rs, 6)] = T1F + T1I;
457
0
         }
458
0
         {
459
0
        E T1Q, T1S, T1T, T1N, T1V, T1L, T1M, T1W, T1U;
460
0
        T1Q = KP559016994 * (T1O - T1P);
461
0
        T1S = T1O + T1P;
462
0
        T1T = FNMS(KP250000000, T1S, T1R);
463
0
        T1L = TU - TV;
464
0
        T1M = TX - TY;
465
0
        T1N = FMA(KP951056516, T1L, KP587785252 * T1M);
466
0
        T1V = FNMS(KP587785252, T1L, KP951056516 * T1M);
467
0
        ci[WS(rs, 9)] = T1S + T1R;
468
0
        T1W = T1T - T1Q;
469
0
        cr[WS(rs, 8)] = T1V - T1W;
470
0
        ci[WS(rs, 7)] = T1V + T1W;
471
0
        T1U = T1Q + T1T;
472
0
        cr[WS(rs, 6)] = T1N - T1U;
473
0
        ci[WS(rs, 5)] = T1N + T1U;
474
0
         }
475
0
    }
476
0
     }
477
0
}
478
479
static const tw_instr twinstr[] = {
480
     { TW_FULL, 1, 10 },
481
     { TW_NEXT, 1, 0 }
482
};
483
484
static const hc2hc_desc desc = { 10, "hf_10", twinstr, &GENUS, { 72, 30, 30, 0 } };
485
486
1
void X(codelet_hf_10) (planner *p) {
487
1
     X(khc2hc_register) (p, hf_10, &desc);
488
1
}
489
#endif