Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/fft.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
 */
11
12
#include "definitions.h"
13
#include "fft_common.h"
14
15
static INLINE void simple_transpose(const float* A, float* b, int32_t n) {
16
    for (int32_t y = 0; y < n; y++) {
17
        for (int32_t x = 0; x < n; x++) {
18
            b[y * n + x] = A[x * n + y];
19
        }
20
    }
21
}
22
23
// The 1d transform is real to complex and packs the complex results in
24
// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
25
// components, followed by the n/2 - 1 imaginary components). After the
26
// transform is done on the rows, the first n/2 + 1 columns are real, and
27
// the remaining are the imaginary components. After the transform on the
28
// columns, the region of [0, n/2]x[0, n/2] contains the real part of
29
// fft of the real columns. The real part of the 2d fft also includes the
30
// imaginary part of transformed imaginary columns. This function assembles
31
// the correct outputs while putting the real and imaginary components
32
// next to each other.
33
static INLINE void unpack_2d_output(const float* col_fft, float* output, int32_t n) {
34
    for (int32_t y = 0; y <= n / 2; ++y) {
35
        const int32_t y2      = y + n / 2;
36
        const int32_t y_extra = y2 > n / 2 && y2 < n;
37
38
        for (int32_t x = 0; x <= n / 2; ++x) {
39
            const int32_t x2            = x + n / 2;
40
            const int32_t x_extra       = x2 > n / 2 && x2 < n;
41
            output[2 * (y * n + x)]     = col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
42
            output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) + (x_extra ? col_fft[y * n + x2] : 0);
43
            if (y_extra) {
44
                output[2 * ((n - y) * n + x)]     = col_fft[y * n + x] + (x_extra ? col_fft[y2 * n + x2] : 0);
45
                output[2 * ((n - y) * n + x) + 1] = -col_fft[y2 * n + x] + (x_extra ? col_fft[y * n + x2] : 0);
46
            }
47
        }
48
    }
49
}
50
51
void svt_aom_fft_2d_gen(const float* input, float* temp, float* output, int32_t n, AomFft1dFunc tform,
52
0
                        AomFftTransposeFunc transpose, AomFftUnpackFunc unpack, int32_t vec_size) {
53
0
    for (int32_t x = 0; x < n; x += vec_size) {
54
0
        tform(input + x, output + x, n);
55
0
    }
56
0
    transpose(output, temp, n);
57
58
0
    for (int32_t x = 0; x < n; x += vec_size) {
59
0
        tform(temp + x, output + x, n);
60
0
    }
61
0
    transpose(output, temp, n);
62
63
0
    unpack(temp, output, n);
64
0
}
65
66
static INLINE void store_float(float* output, float input) {
67
    *output = input;
68
}
69
70
static INLINE float add_float(float a, float b) {
71
    return a + b;
72
}
73
74
static INLINE float sub_float(float a, float b) {
75
    return a - b;
76
}
77
78
static INLINE float mul_float(float a, float b) {
79
    return a * b;
80
}
81
82
GEN_FFT_2(void, float, float, float, *, store_float);
83
GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float, sub_float);
84
GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
85
GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
86
GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
87
88
0
void svt_aom_fft2x2_float_c(const float* input, float* temp, float* output) {
89
0
    svt_aom_fft_2d_gen(input, temp, output, 2, svt_aom_fft1d_2_float, simple_transpose, unpack_2d_output, 1);
90
0
}
91
92
0
void svt_aom_fft4x4_float_c(const float* input, float* temp, float* output) {
93
0
    svt_aom_fft_2d_gen(input, temp, output, 4, svt_aom_fft1d_4_float, simple_transpose, unpack_2d_output, 1);
94
0
}
95
96
0
void svt_aom_fft8x8_float_c(const float* input, float* temp, float* output) {
97
0
    svt_aom_fft_2d_gen(input, temp, output, 8, svt_aom_fft1d_8_float, simple_transpose, unpack_2d_output, 1);
98
0
}
99
100
0
void svt_aom_fft16x16_float_c(const float* input, float* temp, float* output) {
101
0
    svt_aom_fft_2d_gen(input, temp, output, 16, svt_aom_fft1d_16_float, simple_transpose, unpack_2d_output, 1);
102
0
}
103
104
0
void svt_aom_fft32x32_float_c(const float* input, float* temp, float* output) {
105
0
    svt_aom_fft_2d_gen(input, temp, output, 32, svt_aom_fft1d_32_float, simple_transpose, unpack_2d_output, 1);
106
0
}
107
108
void svt_aom_ifft_2d_gen(const float* input, float* temp, float* output, int32_t n, AomFft1dFunc fft_single,
109
                         AomFft1dFunc fft_multi, AomFft1dFunc ifft_multi, AomFftTransposeFunc transpose,
110
0
                         int32_t vec_size) {
111
    // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
112
    // and get real outputs.
113
0
    for (int32_t y = 0; y <= n / 2; ++y) {
114
0
        output[y * n]     = input[2 * y * n];
115
0
        output[y * n + 1] = input[2 * (y * n + n / 2)];
116
0
    }
117
0
    for (int32_t y = n / 2 + 1; y < n; ++y) {
118
0
        output[y * n]     = input[2 * (y - n / 2) * n + 1];
119
0
        output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
120
0
    }
121
122
0
    for (int32_t i = 0; i < 2; i += vec_size) {
123
0
        ifft_multi(output + i, temp + i, n);
124
0
    }
125
    // For the other columns, since we don't have a full ifft for complex inputs
126
    // we have to split them into the real and imaginary counterparts.
127
    // Pack the real component, then the imaginary components.
128
0
    for (int32_t y = 0; y < n; ++y) {
129
0
        for (int32_t x = 1; x < n / 2; ++x) {
130
0
            output[y * n + (x + 1)] = input[2 * (y * n + x)];
131
0
        }
132
0
        for (int32_t x = 1; x < n / 2; ++x) {
133
0
            output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
134
0
        }
135
0
    }
136
0
    for (int32_t y = 2; y < vec_size; y++) {
137
0
        fft_single(output + y, temp + y, n);
138
0
    }
139
    // This is the part that can be sped up with SIMD
140
0
    for (int32_t y = AOMMAX(2, vec_size); y < n; y += vec_size) {
141
0
        fft_multi(output + y, temp + y, n);
142
0
    }
143
    // Put the 0 and n/2 th results in the correct place.
144
0
    for (int32_t x = 0; x < n; ++x) {
145
0
        output[x]               = temp[x * n];
146
0
        output[(n / 2) * n + x] = temp[x * n + 1];
147
0
    }
148
    // This rearranges and transposes.
149
0
    for (int32_t y = 1; y < n / 2; ++y) {
150
        // Fill in the real columns
151
0
        for (int32_t x = 0; x <= n / 2; ++x) {
152
0
            output[x + y * n] = temp[(y + 1) + x * n] +
153
0
                ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
154
0
        }
155
0
        for (int32_t x = n / 2 + 1; x < n; ++x) {
156
0
            output[x + y * n] = temp[(y + 1) + (n - x) * n] - temp[(y + n / 2) + ((n - x) + n / 2) * n];
157
0
        }
158
        // Fill in the imag columns
159
0
        for (int32_t x = 0; x <= n / 2; ++x) {
160
0
            output[x + (y + n / 2) * n] = temp[(y + n / 2) + x * n] -
161
0
                ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
162
0
        }
163
0
        for (int32_t x = n / 2 + 1; x < n; ++x) {
164
0
            output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] + temp[(y + n / 2) + (n - x) * n];
165
0
        }
166
0
    }
167
0
    for (int32_t y = 0; y < n; y += vec_size) {
168
0
        ifft_multi(output + y, temp + y, n);
169
0
    }
170
0
    transpose(temp, output, n);
171
0
}
172
173
GEN_IFFT_2(void, float, float, float, *, store_float);
174
GEN_IFFT_4(void, float, float, float, *, store_float, (float), add_float, sub_float);
175
GEN_IFFT_8(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
176
GEN_IFFT_16(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
177
GEN_IFFT_32(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
178
179
0
void svt_aom_ifft2x2_float_c(const float* input, float* temp, float* output) {
180
0
    svt_aom_ifft_2d_gen(input,
181
0
                        temp,
182
0
                        output,
183
0
                        2,
184
0
                        svt_aom_fft1d_2_float,
185
0
                        svt_aom_fft1d_2_float,
186
0
                        svt_aom_ifft1d_2_float,
187
0
                        simple_transpose,
188
0
                        1);
189
0
}
190
191
0
void svt_aom_ifft4x4_float_c(const float* input, float* temp, float* output) {
192
0
    svt_aom_ifft_2d_gen(input,
193
0
                        temp,
194
0
                        output,
195
0
                        4,
196
0
                        svt_aom_fft1d_4_float,
197
0
                        svt_aom_fft1d_4_float,
198
0
                        svt_aom_ifft1d_4_float,
199
0
                        simple_transpose,
200
0
                        1);
201
0
}
202
203
0
void svt_aom_ifft8x8_float_c(const float* input, float* temp, float* output) {
204
0
    svt_aom_ifft_2d_gen(input,
205
0
                        temp,
206
0
                        output,
207
0
                        8,
208
0
                        svt_aom_fft1d_8_float,
209
0
                        svt_aom_fft1d_8_float,
210
0
                        svt_aom_ifft1d_8_float,
211
0
                        simple_transpose,
212
0
                        1);
213
0
}
214
215
0
void svt_aom_ifft16x16_float_c(const float* input, float* temp, float* output) {
216
0
    svt_aom_ifft_2d_gen(input,
217
0
                        temp,
218
0
                        output,
219
0
                        16,
220
0
                        svt_aom_fft1d_16_float,
221
0
                        svt_aom_fft1d_16_float,
222
0
                        svt_aom_ifft1d_16_float,
223
0
                        simple_transpose,
224
0
                        1);
225
0
}
226
227
0
void svt_aom_ifft32x32_float_c(const float* input, float* temp, float* output) {
228
0
    svt_aom_ifft_2d_gen(input,
229
0
                        temp,
230
0
                        output,
231
0
                        32,
232
0
                        svt_aom_fft1d_32_float,
233
0
                        svt_aom_fft1d_32_float,
234
0
                        svt_aom_ifft1d_32_float,
235
0
                        simple_transpose,
236
0
                        1);
237
0
}