Coverage Report

Created: 2022-08-24 06:15

/src/aom/aom_dsp/fft.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include "aom_dsp/aom_dsp_common.h"
13
#include "aom_dsp/fft_common.h"
14
15
0
static INLINE void simple_transpose(const float *A, float *B, int n) {
16
0
  for (int y = 0; y < n; y++) {
17
0
    for (int x = 0; x < n; x++) {
18
0
      B[y * n + x] = A[x * n + y];
19
0
    }
20
0
  }
21
0
}
22
23
// The 1d transform is real to complex and packs the complex results in
24
// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
25
// components, followed by the n/2 - 1 imaginary components). After the
26
// transform is done on the rows, the first n/2 + 1 columns are real, and
27
// the remaining are the imaginary components. After the transform on the
28
// columns, the region of [0, n/2]x[0, n/2] contains the real part of
29
// fft of the real columns. The real part of the 2d fft also includes the
30
// imaginary part of transformed imaginary columns. This function assembles
31
// the correct outputs while putting the real and imaginary components
32
// next to each other.
33
static INLINE void unpack_2d_output(const float *col_fft, float *output,
34
0
                                    int n) {
35
0
  for (int y = 0; y <= n / 2; ++y) {
36
0
    const int y2 = y + n / 2;
37
0
    const int y_extra = y2 > n / 2 && y2 < n;
38
39
0
    for (int x = 0; x <= n / 2; ++x) {
40
0
      const int x2 = x + n / 2;
41
0
      const int x_extra = x2 > n / 2 && x2 < n;
42
0
      output[2 * (y * n + x)] =
43
0
          col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
44
0
      output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
45
0
                                    (x_extra ? col_fft[y * n + x2] : 0);
46
0
      if (y_extra) {
47
0
        output[2 * ((n - y) * n + x)] =
48
0
            col_fft[y * n + x] +
49
0
            (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
50
0
        output[2 * ((n - y) * n + x) + 1] =
51
0
            -(y_extra ? col_fft[y2 * n + x] : 0) +
52
0
            (x_extra ? col_fft[y * n + x2] : 0);
53
0
      }
54
0
    }
55
0
  }
56
0
}
57
58
void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
59
                    aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
60
0
                    aom_fft_unpack_func_t unpack, int vec_size) {
61
0
  for (int x = 0; x < n; x += vec_size) {
62
0
    tform(input + x, output + x, n);
63
0
  }
64
0
  transpose(output, temp, n);
65
66
0
  for (int x = 0; x < n; x += vec_size) {
67
0
    tform(temp + x, output + x, n);
68
0
  }
69
0
  transpose(output, temp, n);
70
71
0
  unpack(temp, output, n);
72
0
}
73
74
0
static INLINE void store_float(float *output, float input) { *output = input; }
75
0
static INLINE float add_float(float a, float b) { return a + b; }
76
0
static INLINE float sub_float(float a, float b) { return a - b; }
77
0
static INLINE float mul_float(float a, float b) { return a * b; }
78
79
GEN_FFT_2(void, float, float, float, *, store_float);
80
GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
81
          sub_float);
82
GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
83
          sub_float, mul_float);
84
GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
85
           sub_float, mul_float);
86
GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
87
           sub_float, mul_float);
88
89
0
void aom_fft2x2_float_c(const float *input, float *temp, float *output) {
90
0
  aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
91
0
                 unpack_2d_output, 1);
92
0
}
93
94
0
void aom_fft4x4_float_c(const float *input, float *temp, float *output) {
95
0
  aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
96
0
                 unpack_2d_output, 1);
97
0
}
98
99
0
void aom_fft8x8_float_c(const float *input, float *temp, float *output) {
100
0
  aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
101
0
                 unpack_2d_output, 1);
102
0
}
103
104
0
void aom_fft16x16_float_c(const float *input, float *temp, float *output) {
105
0
  aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
106
0
                 unpack_2d_output, 1);
107
0
}
108
109
0
void aom_fft32x32_float_c(const float *input, float *temp, float *output) {
110
0
  aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
111
0
                 unpack_2d_output, 1);
112
0
}
113
114
void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
115
                     aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
116
                     aom_fft_1d_func_t ifft_multi,
117
0
                     aom_fft_transpose_func_t transpose, int vec_size) {
118
  // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
119
  // and get real outputs.
120
0
  for (int y = 0; y <= n / 2; ++y) {
121
0
    output[y * n] = input[2 * y * n];
122
0
    output[y * n + 1] = input[2 * (y * n + n / 2)];
123
0
  }
124
0
  for (int y = n / 2 + 1; y < n; ++y) {
125
0
    output[y * n] = input[2 * (y - n / 2) * n + 1];
126
0
    output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
127
0
  }
128
129
0
  for (int i = 0; i < 2; i += vec_size) {
130
0
    ifft_multi(output + i, temp + i, n);
131
0
  }
132
133
  // For the other columns, since we don't have a full ifft for complex inputs
134
  // we have to split them into the real and imaginary counterparts.
135
  // Pack the real component, then the imaginary components.
136
0
  for (int y = 0; y < n; ++y) {
137
0
    for (int x = 1; x < n / 2; ++x) {
138
0
      output[y * n + (x + 1)] = input[2 * (y * n + x)];
139
0
    }
140
0
    for (int x = 1; x < n / 2; ++x) {
141
0
      output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
142
0
    }
143
0
  }
144
0
  for (int y = 2; y < vec_size; y++) {
145
0
    fft_single(output + y, temp + y, n);
146
0
  }
147
  // This is the part that can be sped up with SIMD
148
0
  for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) {
149
0
    fft_multi(output + y, temp + y, n);
150
0
  }
151
152
  // Put the 0 and n/2 th results in the correct place.
153
0
  for (int x = 0; x < n; ++x) {
154
0
    output[x] = temp[x * n];
155
0
    output[(n / 2) * n + x] = temp[x * n + 1];
156
0
  }
157
  // This rearranges and transposes.
158
0
  for (int y = 1; y < n / 2; ++y) {
159
    // Fill in the real columns
160
0
    for (int x = 0; x <= n / 2; ++x) {
161
0
      output[x + y * n] =
162
0
          temp[(y + 1) + x * n] +
163
0
          ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
164
0
    }
165
0
    for (int x = n / 2 + 1; x < n; ++x) {
166
0
      output[x + y * n] = temp[(y + 1) + (n - x) * n] -
167
0
                          temp[(y + n / 2) + ((n - x) + n / 2) * n];
168
0
    }
169
    // Fill in the imag columns
170
0
    for (int x = 0; x <= n / 2; ++x) {
171
0
      output[x + (y + n / 2) * n] =
172
0
          temp[(y + n / 2) + x * n] -
173
0
          ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
174
0
    }
175
0
    for (int x = n / 2 + 1; x < n; ++x) {
176
0
      output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
177
0
                                    temp[(y + n / 2) + (n - x) * n];
178
0
    }
179
0
  }
180
0
  for (int y = 0; y < n; y += vec_size) {
181
0
    ifft_multi(output + y, temp + y, n);
182
0
  }
183
0
  transpose(temp, output, n);
184
0
}
185
186
GEN_IFFT_2(void, float, float, float, *, store_float);
187
GEN_IFFT_4(void, float, float, float, *, store_float, (float), add_float,
188
           sub_float);
189
GEN_IFFT_8(void, float, float, float, *, store_float, (float), add_float,
190
           sub_float, mul_float);
191
GEN_IFFT_16(void, float, float, float, *, store_float, (float), add_float,
192
            sub_float, mul_float);
193
GEN_IFFT_32(void, float, float, float, *, store_float, (float), add_float,
194
            sub_float, mul_float);
195
196
0
void aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
197
0
  aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
198
0
                  aom_ifft1d_2_float, simple_transpose, 1);
199
0
}
200
201
0
void aom_ifft4x4_float_c(const float *input, float *temp, float *output) {
202
0
  aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
203
0
                  aom_ifft1d_4_float, simple_transpose, 1);
204
0
}
205
206
0
void aom_ifft8x8_float_c(const float *input, float *temp, float *output) {
207
0
  aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
208
0
                  aom_ifft1d_8_float, simple_transpose, 1);
209
0
}
210
211
0
void aom_ifft16x16_float_c(const float *input, float *temp, float *output) {
212
0
  aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
213
0
                  aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
214
0
}
215
216
0
void aom_ifft32x32_float_c(const float *input, float *temp, float *output) {
217
0
  aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
218
0
                  aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
219
0
}