/src/aom/aom_dsp/fft.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/fft_common.h"
#include "config/aom_dsp_rtcd.h"

static inline void simple_transpose(const float *A, float *B, int n) {
  for (int y = 0; y < n; y++) {
    for (int x = 0; x < n; x++) {
      B[y * n + x] = A[x * n + y];
    }
  }
}

// The 1d transform is real to complex and packs the complex results in
// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
// components, followed by the n/2 - 1 imaginary components). After the
// transform is done on the rows, the first n/2 + 1 columns are real, and
// the remaining are the imaginary components. After the transform on the
// columns, the region of [0, n/2]x[0, n/2] contains the real part of
// fft of the real columns. The real part of the 2d fft also includes the
// imaginary part of transformed imaginary columns. This function assembles
// the correct outputs while putting the real and imaginary components
// next to each other.
static inline void unpack_2d_output(const float *col_fft, float *output,
                                    int n) {
  for (int y = 0; y <= n / 2; ++y) {
    const int y2 = y + n / 2;
    const int y_extra = y2 > n / 2 && y2 < n;

    for (int x = 0; x <= n / 2; ++x) {
      const int x2 = x + n / 2;
      const int x_extra = x2 > n / 2 && x2 < n;
      output[2 * (y * n + x)] =
          col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
      output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
                                    (x_extra ? col_fft[y * n + x2] : 0);
      if (y_extra) {
        output[2 * ((n - y) * n + x)] =
            col_fft[y * n + x] +
            (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
        output[2 * ((n - y) * n + x) + 1] =
            -(y_extra ? col_fft[y2 * n + x] : 0) +
            (x_extra ? col_fft[y * n + x2] : 0);
      }
    }
  }
}

void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
                    aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
                    aom_fft_unpack_func_t unpack, int vec_size) {
  for (int x = 0; x < n; x += vec_size) {
    tform(input + x, output + x, n);
  }
  transpose(output, temp, n);

  for (int x = 0; x < n; x += vec_size) {
    tform(temp + x, output + x, n);
  }
  transpose(output, temp, n);

  unpack(temp, output, n);
}

static inline void store_float(float *output, float input) { *output = input; }
static inline float add_float(float a, float b) { return a + b; }
static inline float sub_float(float a, float b) { return a - b; }
static inline float mul_float(float a, float b) { return a * b; }

GEN_FFT_2(void, float, float, float, *, store_float)
GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
          sub_float)
GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
          sub_float, mul_float)
GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
           sub_float, mul_float)
GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
           sub_float, mul_float)

void aom_fft2x2_float_c(const float *input, float *temp, float *output) {
  aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
                 unpack_2d_output, 1);
}

void aom_fft4x4_float_c(const float *input, float *temp, float *output) {
  aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
                 unpack_2d_output, 1);
}

void aom_fft8x8_float_c(const float *input, float *temp, float *output) {
  aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
                 unpack_2d_output, 1);
}

void aom_fft16x16_float_c(const float *input, float *temp, float *output) {
  aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
                 unpack_2d_output, 1);
}

void aom_fft32x32_float_c(const float *input, float *temp, float *output) {
  aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
                 unpack_2d_output, 1);
}

void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
                     aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
                     aom_fft_1d_func_t ifft_multi,
                     aom_fft_transpose_func_t transpose, int vec_size) {
  // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
  // and get real outputs.
  for (int y = 0; y <= n / 2; ++y) {
    output[y * n] = input[2 * y * n];
    output[y * n + 1] = input[2 * (y * n + n / 2)];
  }
  for (int y = n / 2 + 1; y < n; ++y) {
    output[y * n] = input[2 * (y - n / 2) * n + 1];
    output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
  }

  for (int i = 0; i < 2; i += vec_size) {
    ifft_multi(output + i, temp + i, n);
  }

  // For the other columns, since we don't have a full ifft for complex inputs
  // we have to split them into the real and imaginary counterparts.
  // Pack the real component, then the imaginary components.
  for (int y = 0; y < n; ++y) {
    for (int x = 1; x < n / 2; ++x) {
      output[y * n + (x + 1)] = input[2 * (y * n + x)];
    }
    for (int x = 1; x < n / 2; ++x) {
      output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
    }
  }
  for (int y = 2; y < vec_size; y++) {
    fft_single(output + y, temp + y, n);
  }
  // This is the part that can be sped up with SIMD
  for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) {
    fft_multi(output + y, temp + y, n);
  }

  // Put the 0 and n/2 th results in the correct place.
  for (int x = 0; x < n; ++x) {
    output[x] = temp[x * n];
    output[(n / 2) * n + x] = temp[x * n + 1];
  }
  // This rearranges and transposes.
  for (int y = 1; y < n / 2; ++y) {
    // Fill in the real columns
    for (int x = 0; x <= n / 2; ++x) {
      output[x + y * n] =
          temp[(y + 1) + x * n] +
          ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
    }
    for (int x = n / 2 + 1; x < n; ++x) {
      output[x + y * n] = temp[(y + 1) + (n - x) * n] -
                          temp[(y + n / 2) + ((n - x) + n / 2) * n];
    }
    // Fill in the imag columns
    for (int x = 0; x <= n / 2; ++x) {
      output[x + (y + n / 2) * n] =
          temp[(y + n / 2) + x * n] -
          ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
    }
    for (int x = n / 2 + 1; x < n; ++x) {
      output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
                                    temp[(y + n / 2) + (n - x) * n];
    }
  }
  for (int y = 0; y < n; y += vec_size) {
    ifft_multi(output + y, temp + y, n);
  }
  transpose(temp, output, n);
}

GEN_IFFT_2(static void, float, float, float, *, store_float)
GEN_IFFT_4(static void, float, float, float, *, store_float, (float), add_float,
           sub_float)
GEN_IFFT_8(static void, float, float, float, *, store_float, (float), add_float,
           sub_float, mul_float)
GEN_IFFT_16(static void, float, float, float, *, store_float, (float),
            add_float, sub_float, mul_float)
GEN_IFFT_32(static void, float, float, float, *, store_float, (float),
            add_float, sub_float, mul_float)

void aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
  aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
                  aom_ifft1d_2_float, simple_transpose, 1);
}

void aom_ifft4x4_float_c(const float *input, float *temp, float *output) {
  aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
                  aom_ifft1d_4_float, simple_transpose, 1);
}

void aom_ifft8x8_float_c(const float *input, float *temp, float *output) {
  aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
                  aom_ifft1d_8_float, simple_transpose, 1);
}

void aom_ifft16x16_float_c(const float *input, float *temp, float *output) {
  aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
                  aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
}

void aom_ifft32x32_float_c(const float *input, float *temp, float *output) {
  aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
                  aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
}

Coverage Report

Created: 2025-06-22 08:04

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3		*
4		* This source code is subject to the terms of the BSD 2 Clause License and
5		* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6		* was not distributed with this source code in the LICENSE file, you can
7		* obtain it at www.aomedia.org/license/software. If the Alliance for Open
8		* Media Patent License 1.0 was not distributed with this source code in the
9		* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10		*/
11
12		#include "aom_dsp/aom_dsp_common.h"
13		#include "aom_dsp/fft_common.h"
14		#include "config/aom_dsp_rtcd.h"
15
16	0	static inline void simple_transpose(const float A, float B, int n) {
17	0	for (int y = 0; y < n; y++) {
18	0	for (int x = 0; x < n; x++) {
19	0	B[y * n + x] = A[x * n + y];
20	0	}
21	0	}
22	0	}
23
24		// The 1d transform is real to complex and packs the complex results in
25		// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
26		// components, followed by the n/2 - 1 imaginary components). After the
27		// transform is done on the rows, the first n/2 + 1 columns are real, and
28		// the remaining are the imaginary components. After the transform on the
29		// columns, the region of [0, n/2]x[0, n/2] contains the real part of
30		// fft of the real columns. The real part of the 2d fft also includes the
31		// imaginary part of transformed imaginary columns. This function assembles
32		// the correct outputs while putting the real and imaginary components
33		// next to each other.
34		static inline void unpack_2d_output(const float col_fft, float output,
35	0	int n) {
36	0	for (int y = 0; y <= n / 2; ++y) {
37	0	const int y2 = y + n / 2;
38	0	const int y_extra = y2 > n / 2 && y2 < n;
39
40	0	for (int x = 0; x <= n / 2; ++x) {
41	0	const int x2 = x + n / 2;
42	0	const int x_extra = x2 > n / 2 && x2 < n;
43	0	output[2 * (y * n + x)] =
44	0	col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
45	0	output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
46	0	(x_extra ? col_fft[y * n + x2] : 0);
47	0	if (y_extra) {
48	0	output[2 * ((n - y) * n + x)] =
49	0	col_fft[y * n + x] +
50	0	(x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
51	0	output[2 * ((n - y) * n + x) + 1] =
52	0	-(y_extra ? col_fft[y2 * n + x] : 0) +
53	0	(x_extra ? col_fft[y * n + x2] : 0);
54	0	}
55	0	}
56	0	}
57	0	}
58
59		void aom_fft_2d_gen(const float input, float temp, float *output, int n,
60		aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
61	0	aom_fft_unpack_func_t unpack, int vec_size) {
62	0	for (int x = 0; x < n; x += vec_size) {
63	0	tform(input + x, output + x, n);
64	0	}
65	0	transpose(output, temp, n);
66
67	0	for (int x = 0; x < n; x += vec_size) {
68	0	tform(temp + x, output + x, n);
69	0	}
70	0	transpose(output, temp, n);
71
72	0	unpack(temp, output, n);
73	0	}
74
75	0	static inline void store_float(float output, float input) { output = input; }
76	0	static inline float add_float(float a, float b) { return a + b; }
77	0	static inline float sub_float(float a, float b) { return a - b; }
78	0	static inline float mul_float(float a, float b) { return a * b; }
79
80		GEN_FFT_2(void, float, float, float, *, store_float)
81		GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
82		sub_float)
83		GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
84		sub_float, mul_float)
85		GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
86		sub_float, mul_float)
87		GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
88		sub_float, mul_float)
89
90	0	void aom_fft2x2_float_c(const float input, float temp, float *output) {
91	0	aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
92	0	unpack_2d_output, 1);
93	0	}
94
95	0	void aom_fft4x4_float_c(const float input, float temp, float *output) {
96	0	aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
97	0	unpack_2d_output, 1);
98	0	}
99
100	0	void aom_fft8x8_float_c(const float input, float temp, float *output) {
101	0	aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
102	0	unpack_2d_output, 1);
103	0	}
104
105	0	void aom_fft16x16_float_c(const float input, float temp, float *output) {
106	0	aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
107	0	unpack_2d_output, 1);
108	0	}
109
110	0	void aom_fft32x32_float_c(const float input, float temp, float *output) {
111	0	aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
112	0	unpack_2d_output, 1);
113	0	}
114
115		void aom_ifft_2d_gen(const float input, float temp, float *output, int n,
116		aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
117		aom_fft_1d_func_t ifft_multi,
118	0	aom_fft_transpose_func_t transpose, int vec_size) {
119		// Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
120		// and get real outputs.
121	0	for (int y = 0; y <= n / 2; ++y) {
122	0	output[y * n] = input[2 * y * n];
123	0	output[y * n + 1] = input[2 * (y * n + n / 2)];
124	0	}
125	0	for (int y = n / 2 + 1; y < n; ++y) {
126	0	output[y * n] = input[2 * (y - n / 2) * n + 1];
127	0	output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
128	0	}
129
130	0	for (int i = 0; i < 2; i += vec_size) {
131	0	ifft_multi(output + i, temp + i, n);
132	0	}
133
134		// For the other columns, since we don't have a full ifft for complex inputs
135		// we have to split them into the real and imaginary counterparts.
136		// Pack the real component, then the imaginary components.
137	0	for (int y = 0; y < n; ++y) {
138	0	for (int x = 1; x < n / 2; ++x) {
139	0	output[y * n + (x + 1)] = input[2 * (y * n + x)];
140	0	}
141	0	for (int x = 1; x < n / 2; ++x) {
142	0	output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
143	0	}
144	0	}
145	0	for (int y = 2; y < vec_size; y++) {
146	0	fft_single(output + y, temp + y, n);
147	0	}
148		// This is the part that can be sped up with SIMD
149	0	for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) {
150	0	fft_multi(output + y, temp + y, n);
151	0	}
152
153		// Put the 0 and n/2 th results in the correct place.
154	0	for (int x = 0; x < n; ++x) {
155	0	output[x] = temp[x * n];
156	0	output[(n / 2) * n + x] = temp[x * n + 1];
157	0	}
158		// This rearranges and transposes.
159	0	for (int y = 1; y < n / 2; ++y) {
160		// Fill in the real columns
161	0	for (int x = 0; x <= n / 2; ++x) {
162	0	output[x + y * n] =
163	0	temp[(y + 1) + x * n] +
164	0	((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
165	0	}
166	0	for (int x = n / 2 + 1; x < n; ++x) {
167	0	output[x + y * n] = temp[(y + 1) + (n - x) * n] -
168	0	temp[(y + n / 2) + ((n - x) + n / 2) * n];
169	0	}
170		// Fill in the imag columns
171	0	for (int x = 0; x <= n / 2; ++x) {
172	0	output[x + (y + n / 2) * n] =
173	0	temp[(y + n / 2) + x * n] -
174	0	((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
175	0	}
176	0	for (int x = n / 2 + 1; x < n; ++x) {
177	0	output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
178	0	temp[(y + n / 2) + (n - x) * n];
179	0	}
180	0	}
181	0	for (int y = 0; y < n; y += vec_size) {
182	0	ifft_multi(output + y, temp + y, n);
183	0	}
184	0	transpose(temp, output, n);
185	0	}
186
187		GEN_IFFT_2(static void, float, float, float, *, store_float)
188		GEN_IFFT_4(static void, float, float, float, *, store_float, (float), add_float,
189		sub_float)
190		GEN_IFFT_8(static void, float, float, float, *, store_float, (float), add_float,
191		sub_float, mul_float)
192		GEN_IFFT_16(static void, float, float, float, *, store_float, (float),
193		add_float, sub_float, mul_float)
194		GEN_IFFT_32(static void, float, float, float, *, store_float, (float),
195		add_float, sub_float, mul_float)
196
197	0	void aom_ifft2x2_float_c(const float input, float temp, float *output) {
198	0	aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
199	0	aom_ifft1d_2_float, simple_transpose, 1);
200	0	}
201
202	0	void aom_ifft4x4_float_c(const float input, float temp, float *output) {
203	0	aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
204	0	aom_ifft1d_4_float, simple_transpose, 1);
205	0	}
206
207	0	void aom_ifft8x8_float_c(const float input, float temp, float *output) {
208	0	aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
209	0	aom_ifft1d_8_float, simple_transpose, 1);
210	0	}
211
212	0	void aom_ifft16x16_float_c(const float input, float temp, float *output) {
213	0	aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
214	0	aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
215	0	}
216
217	0	void aom_ifft32x32_float_c(const float input, float temp, float *output) {
218	0	aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
219	0	aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
220	0	}