/work/svt-av1/Source/Lib/Codec/fft.c

Source
/*
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
 */

#include "definitions.h"
#include "fft_common.h"

static INLINE void simple_transpose(const float* A, float* b, int32_t n) {
    for (int32_t y = 0; y < n; y++) {
        for (int32_t x = 0; x < n; x++) {
            b[y * n + x] = A[x * n + y];
        }
    }
}

// The 1d transform is real to complex and packs the complex results in
// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
// components, followed by the n/2 - 1 imaginary components). After the
// transform is done on the rows, the first n/2 + 1 columns are real, and
// the remaining are the imaginary components. After the transform on the
// columns, the region of [0, n/2]x[0, n/2] contains the real part of
// fft of the real columns. The real part of the 2d fft also includes the
// imaginary part of transformed imaginary columns. This function assembles
// the correct outputs while putting the real and imaginary components
// next to each other.
static INLINE void unpack_2d_output(const float* col_fft, float* output, int32_t n) {
    for (int32_t y = 0; y <= n / 2; ++y) {
        const int32_t y2      = y + n / 2;
        const int32_t y_extra = y2 > n / 2 && y2 < n;

        for (int32_t x = 0; x <= n / 2; ++x) {
            const int32_t x2            = x + n / 2;
            const int32_t x_extra       = x2 > n / 2 && x2 < n;
            output[2 * (y * n + x)]     = col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
            output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) + (x_extra ? col_fft[y * n + x2] : 0);
            if (y_extra) {
                output[2 * ((n - y) * n + x)]     = col_fft[y * n + x] + (x_extra ? col_fft[y2 * n + x2] : 0);
                output[2 * ((n - y) * n + x) + 1] = -col_fft[y2 * n + x] + (x_extra ? col_fft[y * n + x2] : 0);
            }
        }
    }
}

void svt_aom_fft_2d_gen(const float* input, float* temp, float* output, int32_t n, AomFft1dFunc tform,
                        AomFftTransposeFunc transpose, AomFftUnpackFunc unpack, int32_t vec_size) {
    for (int32_t x = 0; x < n; x += vec_size) {
        tform(input + x, output + x, n);
    }
    transpose(output, temp, n);

    for (int32_t x = 0; x < n; x += vec_size) {
        tform(temp + x, output + x, n);
    }
    transpose(output, temp, n);

    unpack(temp, output, n);
}

static INLINE void store_float(float* output, float input) {
    *output = input;
}

static INLINE float add_float(float a, float b) {
    return a + b;
}

static INLINE float sub_float(float a, float b) {
    return a - b;
}

static INLINE float mul_float(float a, float b) {
    return a * b;
}

GEN_FFT_2(void, float, float, float, *, store_float);
GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float, sub_float);
GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);

void svt_aom_fft2x2_float_c(const float* input, float* temp, float* output) {
    svt_aom_fft_2d_gen(input, temp, output, 2, svt_aom_fft1d_2_float, simple_transpose, unpack_2d_output, 1);
}

void svt_aom_fft4x4_float_c(const float* input, float* temp, float* output) {
    svt_aom_fft_2d_gen(input, temp, output, 4, svt_aom_fft1d_4_float, simple_transpose, unpack_2d_output, 1);
}

void svt_aom_fft8x8_float_c(const float* input, float* temp, float* output) {
    svt_aom_fft_2d_gen(input, temp, output, 8, svt_aom_fft1d_8_float, simple_transpose, unpack_2d_output, 1);
}

void svt_aom_fft16x16_float_c(const float* input, float* temp, float* output) {
    svt_aom_fft_2d_gen(input, temp, output, 16, svt_aom_fft1d_16_float, simple_transpose, unpack_2d_output, 1);
}

void svt_aom_fft32x32_float_c(const float* input, float* temp, float* output) {
    svt_aom_fft_2d_gen(input, temp, output, 32, svt_aom_fft1d_32_float, simple_transpose, unpack_2d_output, 1);
}

void svt_aom_ifft_2d_gen(const float* input, float* temp, float* output, int32_t n, AomFft1dFunc fft_single,
                         AomFft1dFunc fft_multi, AomFft1dFunc ifft_multi, AomFftTransposeFunc transpose,
                         int32_t vec_size) {
    // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
    // and get real outputs.
    for (int32_t y = 0; y <= n / 2; ++y) {
        output[y * n]     = input[2 * y * n];
        output[y * n + 1] = input[2 * (y * n + n / 2)];
    }
    for (int32_t y = n / 2 + 1; y < n; ++y) {
        output[y * n]     = input[2 * (y - n / 2) * n + 1];
        output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
    }

    for (int32_t i = 0; i < 2; i += vec_size) {
        ifft_multi(output + i, temp + i, n);
    }
    // For the other columns, since we don't have a full ifft for complex inputs
    // we have to split them into the real and imaginary counterparts.
    // Pack the real component, then the imaginary components.
    for (int32_t y = 0; y < n; ++y) {
        for (int32_t x = 1; x < n / 2; ++x) {
            output[y * n + (x + 1)] = input[2 * (y * n + x)];
        }
        for (int32_t x = 1; x < n / 2; ++x) {
            output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
        }
    }
    for (int32_t y = 2; y < vec_size; y++) {
        fft_single(output + y, temp + y, n);
    }
    // This is the part that can be sped up with SIMD
    for (int32_t y = AOMMAX(2, vec_size); y < n; y += vec_size) {
        fft_multi(output + y, temp + y, n);
    }
    // Put the 0 and n/2 th results in the correct place.
    for (int32_t x = 0; x < n; ++x) {
        output[x]               = temp[x * n];
        output[(n / 2) * n + x] = temp[x * n + 1];
    }
    // This rearranges and transposes.
    for (int32_t y = 1; y < n / 2; ++y) {
        // Fill in the real columns
        for (int32_t x = 0; x <= n / 2; ++x) {
            output[x + y * n] = temp[(y + 1) + x * n] +
                ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
        }
        for (int32_t x = n / 2 + 1; x < n; ++x) {
            output[x + y * n] = temp[(y + 1) + (n - x) * n] - temp[(y + n / 2) + ((n - x) + n / 2) * n];
        }
        // Fill in the imag columns
        for (int32_t x = 0; x <= n / 2; ++x) {
            output[x + (y + n / 2) * n] = temp[(y + n / 2) + x * n] -
                ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
        }
        for (int32_t x = n / 2 + 1; x < n; ++x) {
            output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] + temp[(y + n / 2) + (n - x) * n];
        }
    }
    for (int32_t y = 0; y < n; y += vec_size) {
        ifft_multi(output + y, temp + y, n);
    }
    transpose(temp, output, n);
}

GEN_IFFT_2(void, float, float, float, *, store_float);
GEN_IFFT_4(void, float, float, float, *, store_float, (float), add_float, sub_float);
GEN_IFFT_8(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
GEN_IFFT_16(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
GEN_IFFT_32(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);

void svt_aom_ifft2x2_float_c(const float* input, float* temp, float* output) {
    svt_aom_ifft_2d_gen(input,
                        temp,
                        output,
                        2,
                        svt_aom_fft1d_2_float,
                        svt_aom_fft1d_2_float,
                        svt_aom_ifft1d_2_float,
                        simple_transpose,
                        1);
}

void svt_aom_ifft4x4_float_c(const float* input, float* temp, float* output) {
    svt_aom_ifft_2d_gen(input,
                        temp,
                        output,
                        4,
                        svt_aom_fft1d_4_float,
                        svt_aom_fft1d_4_float,
                        svt_aom_ifft1d_4_float,
                        simple_transpose,
                        1);
}

void svt_aom_ifft8x8_float_c(const float* input, float* temp, float* output) {
    svt_aom_ifft_2d_gen(input,
                        temp,
                        output,
                        8,
                        svt_aom_fft1d_8_float,
                        svt_aom_fft1d_8_float,
                        svt_aom_ifft1d_8_float,
                        simple_transpose,
                        1);
}

void svt_aom_ifft16x16_float_c(const float* input, float* temp, float* output) {
    svt_aom_ifft_2d_gen(input,
                        temp,
                        output,
                        16,
                        svt_aom_fft1d_16_float,
                        svt_aom_fft1d_16_float,
                        svt_aom_ifft1d_16_float,
                        simple_transpose,
                        1);
}

void svt_aom_ifft32x32_float_c(const float* input, float* temp, float* output) {
    svt_aom_ifft_2d_gen(input,
                        temp,
                        output,
                        32,
                        svt_aom_fft1d_32_float,
                        svt_aom_fft1d_32_float,
                        svt_aom_ifft1d_32_float,
                        simple_transpose,
                        1);
}

Coverage Report

Created: 2026-05-16 06:41

Line	Count	Source
1		/*
2		* Copyright (c) 2018, Alliance for Open Media. All rights reserved
3		*
4		* This source code is subject to the terms of the BSD 2 Clause License and
5		* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6		* was not distributed with this source code in the LICENSE file, you can
7		* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8		* Media Patent License 1.0 was not distributed with this source code in the
9		* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10		*/
11
12		#include "definitions.h"
13		#include "fft_common.h"
14
15		static INLINE void simple_transpose(const float* A, float* b, int32_t n) {
16		for (int32_t y = 0; y < n; y++) {
17		for (int32_t x = 0; x < n; x++) {
18		b[y * n + x] = A[x * n + y];
19		}
20		}
21		}
22
23		// The 1d transform is real to complex and packs the complex results in
24		// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
25		// components, followed by the n/2 - 1 imaginary components). After the
26		// transform is done on the rows, the first n/2 + 1 columns are real, and
27		// the remaining are the imaginary components. After the transform on the
28		// columns, the region of [0, n/2]x[0, n/2] contains the real part of
29		// fft of the real columns. The real part of the 2d fft also includes the
30		// imaginary part of transformed imaginary columns. This function assembles
31		// the correct outputs while putting the real and imaginary components
32		// next to each other.
33		static INLINE void unpack_2d_output(const float* col_fft, float* output, int32_t n) {
34		for (int32_t y = 0; y <= n / 2; ++y) {
35		const int32_t y2 = y + n / 2;
36		const int32_t y_extra = y2 > n / 2 && y2 < n;
37
38		for (int32_t x = 0; x <= n / 2; ++x) {
39		const int32_t x2 = x + n / 2;
40		const int32_t x_extra = x2 > n / 2 && x2 < n;
41		output[2 * (y * n + x)] = col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
42		output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) + (x_extra ? col_fft[y * n + x2] : 0);
43		if (y_extra) {
44		output[2 * ((n - y) * n + x)] = col_fft[y * n + x] + (x_extra ? col_fft[y2 * n + x2] : 0);
45		output[2 * ((n - y) * n + x) + 1] = -col_fft[y2 * n + x] + (x_extra ? col_fft[y * n + x2] : 0);
46		}
47		}
48		}
49		}
50
51		void svt_aom_fft_2d_gen(const float* input, float* temp, float* output, int32_t n, AomFft1dFunc tform,
52	0	AomFftTransposeFunc transpose, AomFftUnpackFunc unpack, int32_t vec_size) {
53	0	for (int32_t x = 0; x < n; x += vec_size) {
54	0	tform(input + x, output + x, n);
55	0	}
56	0	transpose(output, temp, n);
57
58	0	for (int32_t x = 0; x < n; x += vec_size) {
59	0	tform(temp + x, output + x, n);
60	0	}
61	0	transpose(output, temp, n);
62
63	0	unpack(temp, output, n);
64	0	}
65
66		static INLINE void store_float(float* output, float input) {
67		*output = input;
68		}
69
70		static INLINE float add_float(float a, float b) {
71		return a + b;
72		}
73
74		static INLINE float sub_float(float a, float b) {
75		return a - b;
76		}
77
78		static INLINE float mul_float(float a, float b) {
79		return a * b;
80		}
81
82		GEN_FFT_2(void, float, float, float, *, store_float);
83		GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float, sub_float);
84		GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
85		GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
86		GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
87
88	0	void svt_aom_fft2x2_float_c(const float* input, float* temp, float* output) {
89	0	svt_aom_fft_2d_gen(input, temp, output, 2, svt_aom_fft1d_2_float, simple_transpose, unpack_2d_output, 1);
90	0	}
91
92	0	void svt_aom_fft4x4_float_c(const float* input, float* temp, float* output) {
93	0	svt_aom_fft_2d_gen(input, temp, output, 4, svt_aom_fft1d_4_float, simple_transpose, unpack_2d_output, 1);
94	0	}
95
96	0	void svt_aom_fft8x8_float_c(const float* input, float* temp, float* output) {
97	0	svt_aom_fft_2d_gen(input, temp, output, 8, svt_aom_fft1d_8_float, simple_transpose, unpack_2d_output, 1);
98	0	}
99
100	0	void svt_aom_fft16x16_float_c(const float* input, float* temp, float* output) {
101	0	svt_aom_fft_2d_gen(input, temp, output, 16, svt_aom_fft1d_16_float, simple_transpose, unpack_2d_output, 1);
102	0	}
103
104	0	void svt_aom_fft32x32_float_c(const float* input, float* temp, float* output) {
105	0	svt_aom_fft_2d_gen(input, temp, output, 32, svt_aom_fft1d_32_float, simple_transpose, unpack_2d_output, 1);
106	0	}
107
108		void svt_aom_ifft_2d_gen(const float* input, float* temp, float* output, int32_t n, AomFft1dFunc fft_single,
109		AomFft1dFunc fft_multi, AomFft1dFunc ifft_multi, AomFftTransposeFunc transpose,
110	0	int32_t vec_size) {
111		// Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
112		// and get real outputs.
113	0	for (int32_t y = 0; y <= n / 2; ++y) {
114	0	output[y * n] = input[2 * y * n];
115	0	output[y * n + 1] = input[2 * (y * n + n / 2)];
116	0	}
117	0	for (int32_t y = n / 2 + 1; y < n; ++y) {
118	0	output[y * n] = input[2 * (y - n / 2) * n + 1];
119	0	output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
120	0	}
121
122	0	for (int32_t i = 0; i < 2; i += vec_size) {
123	0	ifft_multi(output + i, temp + i, n);
124	0	}
125		// For the other columns, since we don't have a full ifft for complex inputs
126		// we have to split them into the real and imaginary counterparts.
127		// Pack the real component, then the imaginary components.
128	0	for (int32_t y = 0; y < n; ++y) {
129	0	for (int32_t x = 1; x < n / 2; ++x) {
130	0	output[y * n + (x + 1)] = input[2 * (y * n + x)];
131	0	}
132	0	for (int32_t x = 1; x < n / 2; ++x) {
133	0	output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
134	0	}
135	0	}
136	0	for (int32_t y = 2; y < vec_size; y++) {
137	0	fft_single(output + y, temp + y, n);
138	0	}
139		// This is the part that can be sped up with SIMD
140	0	for (int32_t y = AOMMAX(2, vec_size); y < n; y += vec_size) {
141	0	fft_multi(output + y, temp + y, n);
142	0	}
143		// Put the 0 and n/2 th results in the correct place.
144	0	for (int32_t x = 0; x < n; ++x) {
145	0	output[x] = temp[x * n];
146	0	output[(n / 2) * n + x] = temp[x * n + 1];
147	0	}
148		// This rearranges and transposes.
149	0	for (int32_t y = 1; y < n / 2; ++y) {
150		// Fill in the real columns
151	0	for (int32_t x = 0; x <= n / 2; ++x) {
152	0	output[x + y * n] = temp[(y + 1) + x * n] +
153	0	((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
154	0	}
155	0	for (int32_t x = n / 2 + 1; x < n; ++x) {
156	0	output[x + y * n] = temp[(y + 1) + (n - x) * n] - temp[(y + n / 2) + ((n - x) + n / 2) * n];
157	0	}
158		// Fill in the imag columns
159	0	for (int32_t x = 0; x <= n / 2; ++x) {
160	0	output[x + (y + n / 2) * n] = temp[(y + n / 2) + x * n] -
161	0	((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
162	0	}
163	0	for (int32_t x = n / 2 + 1; x < n; ++x) {
164	0	output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] + temp[(y + n / 2) + (n - x) * n];
165	0	}
166	0	}
167	0	for (int32_t y = 0; y < n; y += vec_size) {
168	0	ifft_multi(output + y, temp + y, n);
169	0	}
170	0	transpose(temp, output, n);
171	0	}
172
173		GEN_IFFT_2(void, float, float, float, *, store_float);
174		GEN_IFFT_4(void, float, float, float, *, store_float, (float), add_float, sub_float);
175		GEN_IFFT_8(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
176		GEN_IFFT_16(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
177		GEN_IFFT_32(void, float, float, float, *, store_float, (float), add_float, sub_float, mul_float);
178
179	0	void svt_aom_ifft2x2_float_c(const float* input, float* temp, float* output) {
180	0	svt_aom_ifft_2d_gen(input,
181	0	temp,
182	0	output,
183	0	2,
184	0	svt_aom_fft1d_2_float,
185	0	svt_aom_fft1d_2_float,
186	0	svt_aom_ifft1d_2_float,
187	0	simple_transpose,
188	0	1);
189	0	}
190
191	0	void svt_aom_ifft4x4_float_c(const float* input, float* temp, float* output) {
192	0	svt_aom_ifft_2d_gen(input,
193	0	temp,
194	0	output,
195	0	4,
196	0	svt_aom_fft1d_4_float,
197	0	svt_aom_fft1d_4_float,
198	0	svt_aom_ifft1d_4_float,
199	0	simple_transpose,
200	0	1);
201	0	}
202
203	0	void svt_aom_ifft8x8_float_c(const float* input, float* temp, float* output) {
204	0	svt_aom_ifft_2d_gen(input,
205	0	temp,
206	0	output,
207	0	8,
208	0	svt_aom_fft1d_8_float,
209	0	svt_aom_fft1d_8_float,
210	0	svt_aom_ifft1d_8_float,
211	0	simple_transpose,
212	0	1);
213	0	}
214
215	0	void svt_aom_ifft16x16_float_c(const float* input, float* temp, float* output) {
216	0	svt_aom_ifft_2d_gen(input,
217	0	temp,
218	0	output,
219	0	16,
220	0	svt_aom_fft1d_16_float,
221	0	svt_aom_fft1d_16_float,
222	0	svt_aom_ifft1d_16_float,
223	0	simple_transpose,
224	0	1);
225	0	}
226
227	0	void svt_aom_ifft32x32_float_c(const float* input, float* temp, float* output) {
228	0	svt_aom_ifft_2d_gen(input,
229	0	temp,
230	0	output,
231	0	32,
232	0	svt_aom_fft1d_32_float,
233	0	svt_aom_fft1d_32_float,
234	0	svt_aom_ifft1d_32_float,
235	0	simple_transpose,
236	0	1);
237	0	}