Coverage Report

Created: 2022-08-24 06:11

/src/aom/aom_dsp/fwd_txfm.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include "aom_dsp/txfm_common.h"
14
#include "config/aom_dsp_rtcd.h"
15
16
0
void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
17
  // The 2D transform is done with two passes which are actually pretty
18
  // similar. In the first one, we transform the columns and transpose
19
  // the results. In the second one, we transform the rows. To achieve that,
20
  // as the first pass results are transposed, we transpose the columns (that
21
  // is the transposed rows) and transpose the results (so that it goes back
22
  // in normal/row positions).
23
  // We need an intermediate buffer between passes.
24
0
  tran_low_t intermediate[4 * 4];
25
0
  const tran_low_t *in_low = NULL;
26
0
  tran_low_t *out = intermediate;
27
  // Do the two transform/transpose passes
28
0
  for (int pass = 0; pass < 2; ++pass) {
29
0
    tran_high_t in_high[4];    // canbe16
30
0
    tran_high_t step[4];       // canbe16
31
0
    tran_high_t temp1, temp2;  // needs32
32
0
    for (int i = 0; i < 4; ++i) {
33
      // Load inputs.
34
0
      if (pass == 0) {
35
0
        in_high[0] = input[0 * stride] * 16;
36
0
        in_high[1] = input[1 * stride] * 16;
37
0
        in_high[2] = input[2 * stride] * 16;
38
0
        in_high[3] = input[3 * stride] * 16;
39
0
        if (i == 0 && in_high[0]) {
40
0
          ++in_high[0];
41
0
        }
42
0
      } else {
43
0
        assert(in_low != NULL);
44
0
        in_high[0] = in_low[0 * 4];
45
0
        in_high[1] = in_low[1 * 4];
46
0
        in_high[2] = in_low[2 * 4];
47
0
        in_high[3] = in_low[3 * 4];
48
0
        ++in_low;
49
0
      }
50
      // Transform.
51
0
      step[0] = in_high[0] + in_high[3];
52
0
      step[1] = in_high[1] + in_high[2];
53
0
      step[2] = in_high[1] - in_high[2];
54
0
      step[3] = in_high[0] - in_high[3];
55
0
      temp1 = (step[0] + step[1]) * cospi_16_64;
56
0
      temp2 = (step[0] - step[1]) * cospi_16_64;
57
0
      out[0] = (tran_low_t)fdct_round_shift(temp1);
58
0
      out[2] = (tran_low_t)fdct_round_shift(temp2);
59
0
      temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
60
0
      temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
61
0
      out[1] = (tran_low_t)fdct_round_shift(temp1);
62
0
      out[3] = (tran_low_t)fdct_round_shift(temp2);
63
      // Do next column (which is a transposed row in second/horizontal pass)
64
0
      ++input;
65
0
      out += 4;
66
0
    }
67
    // Setup in/out for next pass.
68
0
    in_low = intermediate;
69
0
    out = output;
70
0
  }
71
72
0
  for (int i = 0; i < 4; ++i) {
73
0
    for (int j = 0; j < 4; ++j)
74
0
      output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
75
0
  }
76
0
}
77
78
0
void aom_fdct4x4_lp_c(const int16_t *input, int16_t *output, int stride) {
79
  // The 2D transform is done with two passes which are actually pretty
80
  // similar. In the first one, we transform the columns and transpose
81
  // the results. In the second one, we transform the rows. To achieve that,
82
  // as the first pass results are transposed, we transpose the columns (that
83
  // is the transposed rows) and transpose the results (so that it goes back
84
  // in normal/row positions).
85
  // We need an intermediate buffer between passes.
86
0
  int16_t intermediate[4 * 4];
87
0
  const int16_t *in_low = NULL;
88
0
  int16_t *out = intermediate;
89
  // Do the two transform/transpose passes
90
0
  for (int pass = 0; pass < 2; ++pass) {
91
0
    int32_t in_high[4];    // canbe16
92
0
    int32_t step[4];       // canbe16
93
0
    int32_t temp1, temp2;  // needs32
94
0
    for (int i = 0; i < 4; ++i) {
95
      // Load inputs.
96
0
      if (pass == 0) {
97
0
        in_high[0] = input[0 * stride] * 16;
98
0
        in_high[1] = input[1 * stride] * 16;
99
0
        in_high[2] = input[2 * stride] * 16;
100
0
        in_high[3] = input[3 * stride] * 16;
101
0
        if (i == 0 && in_high[0]) {
102
0
          ++in_high[0];
103
0
        }
104
0
      } else {
105
0
        assert(in_low != NULL);
106
0
        in_high[0] = in_low[0 * 4];
107
0
        in_high[1] = in_low[1 * 4];
108
0
        in_high[2] = in_low[2 * 4];
109
0
        in_high[3] = in_low[3 * 4];
110
0
        ++in_low;
111
0
      }
112
      // Transform.
113
0
      step[0] = in_high[0] + in_high[3];
114
0
      step[1] = in_high[1] + in_high[2];
115
0
      step[2] = in_high[1] - in_high[2];
116
0
      step[3] = in_high[0] - in_high[3];
117
0
      temp1 = (step[0] + step[1]) * (int32_t)cospi_16_64;
118
0
      temp2 = (step[0] - step[1]) * (int32_t)cospi_16_64;
119
0
      out[0] = (int16_t)fdct_round_shift(temp1);
120
0
      out[2] = (int16_t)fdct_round_shift(temp2);
121
0
      temp1 = step[2] * (int32_t)cospi_24_64 + step[3] * (int32_t)cospi_8_64;
122
0
      temp2 = -step[2] * (int32_t)cospi_8_64 + step[3] * (int32_t)cospi_24_64;
123
0
      out[1] = (int16_t)fdct_round_shift(temp1);
124
0
      out[3] = (int16_t)fdct_round_shift(temp2);
125
      // Do next column (which is a transposed row in second/horizontal pass)
126
0
      ++input;
127
0
      out += 4;
128
0
    }
129
    // Setup in/out for next pass.
130
0
    in_low = intermediate;
131
0
    out = output;
132
0
  }
133
134
0
  for (int i = 0; i < 4; ++i) {
135
0
    for (int j = 0; j < 4; ++j)
136
0
      output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
137
0
  }
138
0
}
139
140
0
void aom_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
141
0
  int i, j;
142
0
  tran_low_t intermediate[64];
143
0
  int pass;
144
0
  tran_low_t *output = intermediate;
145
0
  const tran_low_t *in = NULL;
146
147
  // Transform columns
148
0
  for (pass = 0; pass < 2; ++pass) {
149
0
    tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;  // canbe16
150
0
    tran_high_t t0, t1, t2, t3;                  // needs32
151
0
    tran_high_t x0, x1, x2, x3;                  // canbe16
152
153
0
    for (i = 0; i < 8; i++) {
154
      // stage 1
155
0
      if (pass == 0) {
156
0
        s0 = (input[0 * stride] + input[7 * stride]) * 4;
157
0
        s1 = (input[1 * stride] + input[6 * stride]) * 4;
158
0
        s2 = (input[2 * stride] + input[5 * stride]) * 4;
159
0
        s3 = (input[3 * stride] + input[4 * stride]) * 4;
160
0
        s4 = (input[3 * stride] - input[4 * stride]) * 4;
161
0
        s5 = (input[2 * stride] - input[5 * stride]) * 4;
162
0
        s6 = (input[1 * stride] - input[6 * stride]) * 4;
163
0
        s7 = (input[0 * stride] - input[7 * stride]) * 4;
164
0
        ++input;
165
0
      } else {
166
0
        s0 = in[0 * 8] + in[7 * 8];
167
0
        s1 = in[1 * 8] + in[6 * 8];
168
0
        s2 = in[2 * 8] + in[5 * 8];
169
0
        s3 = in[3 * 8] + in[4 * 8];
170
0
        s4 = in[3 * 8] - in[4 * 8];
171
0
        s5 = in[2 * 8] - in[5 * 8];
172
0
        s6 = in[1 * 8] - in[6 * 8];
173
0
        s7 = in[0 * 8] - in[7 * 8];
174
0
        ++in;
175
0
      }
176
177
      // fdct4(step, step);
178
0
      x0 = s0 + s3;
179
0
      x1 = s1 + s2;
180
0
      x2 = s1 - s2;
181
0
      x3 = s0 - s3;
182
0
      t0 = (x0 + x1) * cospi_16_64;
183
0
      t1 = (x0 - x1) * cospi_16_64;
184
0
      t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
185
0
      t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
186
0
      output[0] = (tran_low_t)fdct_round_shift(t0);
187
0
      output[2] = (tran_low_t)fdct_round_shift(t2);
188
0
      output[4] = (tran_low_t)fdct_round_shift(t1);
189
0
      output[6] = (tran_low_t)fdct_round_shift(t3);
190
191
      // Stage 2
192
0
      t0 = (s6 - s5) * cospi_16_64;
193
0
      t1 = (s6 + s5) * cospi_16_64;
194
0
      t2 = fdct_round_shift(t0);
195
0
      t3 = fdct_round_shift(t1);
196
197
      // Stage 3
198
0
      x0 = s4 + t2;
199
0
      x1 = s4 - t2;
200
0
      x2 = s7 - t3;
201
0
      x3 = s7 + t3;
202
203
      // Stage 4
204
0
      t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
205
0
      t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
206
0
      t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
207
0
      t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
208
0
      output[1] = (tran_low_t)fdct_round_shift(t0);
209
0
      output[3] = (tran_low_t)fdct_round_shift(t2);
210
0
      output[5] = (tran_low_t)fdct_round_shift(t1);
211
0
      output[7] = (tran_low_t)fdct_round_shift(t3);
212
0
      output += 8;
213
0
    }
214
0
    in = intermediate;
215
0
    output = final_output;
216
0
  }
217
218
  // Rows
219
0
  for (i = 0; i < 8; ++i) {
220
0
    for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
221
0
  }
222
0
}
223
224
#if CONFIG_AV1_HIGHBITDEPTH
225
void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
226
0
                          int stride) {
227
0
  aom_fdct8x8_c(input, final_output, stride);
228
0
}
229
#endif