/src/aom/aom_dsp/fwd_txfm.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <assert.h>
#include "aom_dsp/txfm_common.h"
#include "config/aom_dsp_rtcd.h"

void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
  // the results. In the second one, we transform the rows. To achieve that,
  // as the first pass results are transposed, we transpose the columns (that
  // is the transposed rows) and transpose the results (so that it goes back
  // in normal/row positions).
  // We need an intermediate buffer between passes.
  tran_low_t intermediate[4 * 4];
  const tran_low_t *in_low = NULL;
  tran_low_t *out = intermediate;
  // Do the two transform/transpose passes
  for (int pass = 0; pass < 2; ++pass) {
    tran_high_t in_high[4];    // canbe16
    tran_high_t step[4];       // canbe16
    tran_high_t temp1, temp2;  // needs32
    for (int i = 0; i < 4; ++i) {
      // Load inputs.
      if (pass == 0) {
        in_high[0] = input[0 * stride] * 16;
        in_high[1] = input[1 * stride] * 16;
        in_high[2] = input[2 * stride] * 16;
        in_high[3] = input[3 * stride] * 16;
        if (i == 0 && in_high[0]) {
          ++in_high[0];
        }
      } else {
        assert(in_low != NULL);
        in_high[0] = in_low[0 * 4];
        in_high[1] = in_low[1 * 4];
        in_high[2] = in_low[2 * 4];
        in_high[3] = in_low[3 * 4];
        ++in_low;
      }
      // Transform.
      step[0] = in_high[0] + in_high[3];
      step[1] = in_high[1] + in_high[2];
      step[2] = in_high[1] - in_high[2];
      step[3] = in_high[0] - in_high[3];
      temp1 = (step[0] + step[1]) * cospi_16_64;
      temp2 = (step[0] - step[1]) * cospi_16_64;
      out[0] = (tran_low_t)fdct_round_shift(temp1);
      out[2] = (tran_low_t)fdct_round_shift(temp2);
      temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
      temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
      out[1] = (tran_low_t)fdct_round_shift(temp1);
      out[3] = (tran_low_t)fdct_round_shift(temp2);
      // Do next column (which is a transposed row in second/horizontal pass)
      ++input;
      out += 4;
    }
    // Setup in/out for next pass.
    in_low = intermediate;
    out = output;
  }

  for (int i = 0; i < 4; ++i) {
    for (int j = 0; j < 4; ++j)
      output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
  }
}

void aom_fdct4x4_lp_c(const int16_t *input, int16_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
  // the results. In the second one, we transform the rows. To achieve that,
  // as the first pass results are transposed, we transpose the columns (that
  // is the transposed rows) and transpose the results (so that it goes back
  // in normal/row positions).
  // We need an intermediate buffer between passes.
  int16_t intermediate[4 * 4];
  const int16_t *in_low = NULL;
  int16_t *out = intermediate;
  // Do the two transform/transpose passes
  for (int pass = 0; pass < 2; ++pass) {
    int32_t in_high[4];    // canbe16
    int32_t step[4];       // canbe16
    int32_t temp1, temp2;  // needs32
    for (int i = 0; i < 4; ++i) {
      // Load inputs.
      if (pass == 0) {
        in_high[0] = input[0 * stride] * 16;
        in_high[1] = input[1 * stride] * 16;
        in_high[2] = input[2 * stride] * 16;
        in_high[3] = input[3 * stride] * 16;
        if (i == 0 && in_high[0]) {
          ++in_high[0];
        }
      } else {
        assert(in_low != NULL);
        in_high[0] = in_low[0 * 4];
        in_high[1] = in_low[1 * 4];
        in_high[2] = in_low[2 * 4];
        in_high[3] = in_low[3 * 4];
        ++in_low;
      }
      // Transform.
      step[0] = in_high[0] + in_high[3];
      step[1] = in_high[1] + in_high[2];
      step[2] = in_high[1] - in_high[2];
      step[3] = in_high[0] - in_high[3];
      temp1 = (step[0] + step[1]) * (int32_t)cospi_16_64;
      temp2 = (step[0] - step[1]) * (int32_t)cospi_16_64;
      out[0] = (int16_t)fdct_round_shift(temp1);
      out[2] = (int16_t)fdct_round_shift(temp2);
      temp1 = step[2] * (int32_t)cospi_24_64 + step[3] * (int32_t)cospi_8_64;
      temp2 = -step[2] * (int32_t)cospi_8_64 + step[3] * (int32_t)cospi_24_64;
      out[1] = (int16_t)fdct_round_shift(temp1);
      out[3] = (int16_t)fdct_round_shift(temp2);
      // Do next column (which is a transposed row in second/horizontal pass)
      ++input;
      out += 4;
    }
    // Setup in/out for next pass.
    in_low = intermediate;
    out = output;
  }

  for (int i = 0; i < 4; ++i) {
    for (int j = 0; j < 4; ++j)
      output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
  }
}

void aom_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
  int i, j;
  tran_low_t intermediate[64];
  int pass;
  tran_low_t *output = intermediate;
  const tran_low_t *in = NULL;

  // Transform columns
  for (pass = 0; pass < 2; ++pass) {
    tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;  // canbe16
    tran_high_t t0, t1, t2, t3;                  // needs32
    tran_high_t x0, x1, x2, x3;                  // canbe16

    for (i = 0; i < 8; i++) {
      // stage 1
      if (pass == 0) {
        s0 = (input[0 * stride] + input[7 * stride]) * 4;
        s1 = (input[1 * stride] + input[6 * stride]) * 4;
        s2 = (input[2 * stride] + input[5 * stride]) * 4;
        s3 = (input[3 * stride] + input[4 * stride]) * 4;
        s4 = (input[3 * stride] - input[4 * stride]) * 4;
        s5 = (input[2 * stride] - input[5 * stride]) * 4;
        s6 = (input[1 * stride] - input[6 * stride]) * 4;
        s7 = (input[0 * stride] - input[7 * stride]) * 4;
        ++input;
      } else {
        s0 = in[0 * 8] + in[7 * 8];
        s1 = in[1 * 8] + in[6 * 8];
        s2 = in[2 * 8] + in[5 * 8];
        s3 = in[3 * 8] + in[4 * 8];
        s4 = in[3 * 8] - in[4 * 8];
        s5 = in[2 * 8] - in[5 * 8];
        s6 = in[1 * 8] - in[6 * 8];
        s7 = in[0 * 8] - in[7 * 8];
        ++in;
      }

      // fdct4(step, step);
      x0 = s0 + s3;
      x1 = s1 + s2;
      x2 = s1 - s2;
      x3 = s0 - s3;
      t0 = (x0 + x1) * cospi_16_64;
      t1 = (x0 - x1) * cospi_16_64;
      t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
      t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
      output[0] = (tran_low_t)fdct_round_shift(t0);
      output[2] = (tran_low_t)fdct_round_shift(t2);
      output[4] = (tran_low_t)fdct_round_shift(t1);
      output[6] = (tran_low_t)fdct_round_shift(t3);

      // Stage 2
      t0 = (s6 - s5) * cospi_16_64;
      t1 = (s6 + s5) * cospi_16_64;
      t2 = fdct_round_shift(t0);
      t3 = fdct_round_shift(t1);

      // Stage 3
      x0 = s4 + t2;
      x1 = s4 - t2;
      x2 = s7 - t3;
      x3 = s7 + t3;

      // Stage 4
      t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
      t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
      t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
      t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
      output[1] = (tran_low_t)fdct_round_shift(t0);
      output[3] = (tran_low_t)fdct_round_shift(t2);
      output[5] = (tran_low_t)fdct_round_shift(t1);
      output[7] = (tran_low_t)fdct_round_shift(t3);
      output += 8;
    }
    in = intermediate;
    output = final_output;
  }

  // Rows
  for (i = 0; i < 8; ++i) {
    for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
  }
}

#if CONFIG_AV1_HIGHBITDEPTH
void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
                          int stride) {
  aom_fdct8x8_c(input, final_output, stride);
}
#endif

Coverage Report

Created: 2022-08-24 06:11

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2016, Alliance for Open Media. All rights reserved
3		*
4		* This source code is subject to the terms of the BSD 2 Clause License and
5		* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6		* was not distributed with this source code in the LICENSE file, you can
7		* obtain it at www.aomedia.org/license/software. If the Alliance for Open
8		* Media Patent License 1.0 was not distributed with this source code in the
9		* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10		*/
11
12		#include <assert.h>
13		#include "aom_dsp/txfm_common.h"
14		#include "config/aom_dsp_rtcd.h"
15
16	0	void aom_fdct4x4_c(const int16_t input, tran_low_t output, int stride) {
17		// The 2D transform is done with two passes which are actually pretty
18		// similar. In the first one, we transform the columns and transpose
19		// the results. In the second one, we transform the rows. To achieve that,
20		// as the first pass results are transposed, we transpose the columns (that
21		// is the transposed rows) and transpose the results (so that it goes back
22		// in normal/row positions).
23		// We need an intermediate buffer between passes.
24	0	tran_low_t intermediate[4 * 4];
25	0	const tran_low_t *in_low = NULL;
26	0	tran_low_t *out = intermediate;
27		// Do the two transform/transpose passes
28	0	for (int pass = 0; pass < 2; ++pass) {
29	0	tran_high_t in_high[4]; // canbe16
30	0	tran_high_t step[4]; // canbe16
31	0	tran_high_t temp1, temp2; // needs32
32	0	for (int i = 0; i < 4; ++i) {
33		// Load inputs.
34	0	if (pass == 0) {
35	0	in_high[0] = input[0 * stride] * 16;
36	0	in_high[1] = input[1 * stride] * 16;
37	0	in_high[2] = input[2 * stride] * 16;
38	0	in_high[3] = input[3 * stride] * 16;
39	0	if (i == 0 && in_high[0]) {
40	0	++in_high[0];
41	0	}
42	0	} else {
43	0	assert(in_low != NULL);
44	0	in_high[0] = in_low[0 * 4];
45	0	in_high[1] = in_low[1 * 4];
46	0	in_high[2] = in_low[2 * 4];
47	0	in_high[3] = in_low[3 * 4];
48	0	++in_low;
49	0	}
50		// Transform.
51	0	step[0] = in_high[0] + in_high[3];
52	0	step[1] = in_high[1] + in_high[2];
53	0	step[2] = in_high[1] - in_high[2];
54	0	step[3] = in_high[0] - in_high[3];
55	0	temp1 = (step[0] + step[1]) * cospi_16_64;
56	0	temp2 = (step[0] - step[1]) * cospi_16_64;
57	0	out[0] = (tran_low_t)fdct_round_shift(temp1);
58	0	out[2] = (tran_low_t)fdct_round_shift(temp2);
59	0	temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
60	0	temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
61	0	out[1] = (tran_low_t)fdct_round_shift(temp1);
62	0	out[3] = (tran_low_t)fdct_round_shift(temp2);
63		// Do next column (which is a transposed row in second/horizontal pass)
64	0	++input;
65	0	out += 4;
66	0	}
67		// Setup in/out for next pass.
68	0	in_low = intermediate;
69	0	out = output;
70	0	}
71
72	0	for (int i = 0; i < 4; ++i) {
73	0	for (int j = 0; j < 4; ++j)
74	0	output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
75	0	}
76	0	}
77
78	0	void aom_fdct4x4_lp_c(const int16_t input, int16_t output, int stride) {
79		// The 2D transform is done with two passes which are actually pretty
80		// similar. In the first one, we transform the columns and transpose
81		// the results. In the second one, we transform the rows. To achieve that,
82		// as the first pass results are transposed, we transpose the columns (that
83		// is the transposed rows) and transpose the results (so that it goes back
84		// in normal/row positions).
85		// We need an intermediate buffer between passes.
86	0	int16_t intermediate[4 * 4];
87	0	const int16_t *in_low = NULL;
88	0	int16_t *out = intermediate;
89		// Do the two transform/transpose passes
90	0	for (int pass = 0; pass < 2; ++pass) {
91	0	int32_t in_high[4]; // canbe16
92	0	int32_t step[4]; // canbe16
93	0	int32_t temp1, temp2; // needs32
94	0	for (int i = 0; i < 4; ++i) {
95		// Load inputs.
96	0	if (pass == 0) {
97	0	in_high[0] = input[0 * stride] * 16;
98	0	in_high[1] = input[1 * stride] * 16;
99	0	in_high[2] = input[2 * stride] * 16;
100	0	in_high[3] = input[3 * stride] * 16;
101	0	if (i == 0 && in_high[0]) {
102	0	++in_high[0];
103	0	}
104	0	} else {
105	0	assert(in_low != NULL);
106	0	in_high[0] = in_low[0 * 4];
107	0	in_high[1] = in_low[1 * 4];
108	0	in_high[2] = in_low[2 * 4];
109	0	in_high[3] = in_low[3 * 4];
110	0	++in_low;
111	0	}
112		// Transform.
113	0	step[0] = in_high[0] + in_high[3];
114	0	step[1] = in_high[1] + in_high[2];
115	0	step[2] = in_high[1] - in_high[2];
116	0	step[3] = in_high[0] - in_high[3];
117	0	temp1 = (step[0] + step[1]) * (int32_t)cospi_16_64;
118	0	temp2 = (step[0] - step[1]) * (int32_t)cospi_16_64;
119	0	out[0] = (int16_t)fdct_round_shift(temp1);
120	0	out[2] = (int16_t)fdct_round_shift(temp2);
121	0	temp1 = step[2] * (int32_t)cospi_24_64 + step[3] * (int32_t)cospi_8_64;
122	0	temp2 = -step[2] * (int32_t)cospi_8_64 + step[3] * (int32_t)cospi_24_64;
123	0	out[1] = (int16_t)fdct_round_shift(temp1);
124	0	out[3] = (int16_t)fdct_round_shift(temp2);
125		// Do next column (which is a transposed row in second/horizontal pass)
126	0	++input;
127	0	out += 4;
128	0	}
129		// Setup in/out for next pass.
130	0	in_low = intermediate;
131	0	out = output;
132	0	}
133
134	0	for (int i = 0; i < 4; ++i) {
135	0	for (int j = 0; j < 4; ++j)
136	0	output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
137	0	}
138	0	}
139
140	0	void aom_fdct8x8_c(const int16_t input, tran_low_t final_output, int stride) {
141	0	int i, j;
142	0	tran_low_t intermediate[64];
143	0	int pass;
144	0	tran_low_t *output = intermediate;
145	0	const tran_low_t *in = NULL;
146
147		// Transform columns
148	0	for (pass = 0; pass < 2; ++pass) {
149	0	tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
150	0	tran_high_t t0, t1, t2, t3; // needs32
151	0	tran_high_t x0, x1, x2, x3; // canbe16
152
153	0	for (i = 0; i < 8; i++) {
154		// stage 1
155	0	if (pass == 0) {
156	0	s0 = (input[0 * stride] + input[7 * stride]) * 4;
157	0	s1 = (input[1 * stride] + input[6 * stride]) * 4;
158	0	s2 = (input[2 * stride] + input[5 * stride]) * 4;
159	0	s3 = (input[3 * stride] + input[4 * stride]) * 4;
160	0	s4 = (input[3 * stride] - input[4 * stride]) * 4;
161	0	s5 = (input[2 * stride] - input[5 * stride]) * 4;
162	0	s6 = (input[1 * stride] - input[6 * stride]) * 4;
163	0	s7 = (input[0 * stride] - input[7 * stride]) * 4;
164	0	++input;
165	0	} else {
166	0	s0 = in[0 * 8] + in[7 * 8];
167	0	s1 = in[1 * 8] + in[6 * 8];
168	0	s2 = in[2 * 8] + in[5 * 8];
169	0	s3 = in[3 * 8] + in[4 * 8];
170	0	s4 = in[3 * 8] - in[4 * 8];
171	0	s5 = in[2 * 8] - in[5 * 8];
172	0	s6 = in[1 * 8] - in[6 * 8];
173	0	s7 = in[0 * 8] - in[7 * 8];
174	0	++in;
175	0	}
176
177		// fdct4(step, step);
178	0	x0 = s0 + s3;
179	0	x1 = s1 + s2;
180	0	x2 = s1 - s2;
181	0	x3 = s0 - s3;
182	0	t0 = (x0 + x1) * cospi_16_64;
183	0	t1 = (x0 - x1) * cospi_16_64;
184	0	t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
185	0	t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
186	0	output[0] = (tran_low_t)fdct_round_shift(t0);
187	0	output[2] = (tran_low_t)fdct_round_shift(t2);
188	0	output[4] = (tran_low_t)fdct_round_shift(t1);
189	0	output[6] = (tran_low_t)fdct_round_shift(t3);
190
191		// Stage 2
192	0	t0 = (s6 - s5) * cospi_16_64;
193	0	t1 = (s6 + s5) * cospi_16_64;
194	0	t2 = fdct_round_shift(t0);
195	0	t3 = fdct_round_shift(t1);
196
197		// Stage 3
198	0	x0 = s4 + t2;
199	0	x1 = s4 - t2;
200	0	x2 = s7 - t3;
201	0	x3 = s7 + t3;
202
203		// Stage 4
204	0	t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
205	0	t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
206	0	t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
207	0	t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
208	0	output[1] = (tran_low_t)fdct_round_shift(t0);
209	0	output[3] = (tran_low_t)fdct_round_shift(t2);
210	0	output[5] = (tran_low_t)fdct_round_shift(t1);
211	0	output[7] = (tran_low_t)fdct_round_shift(t3);
212	0	output += 8;
213	0	}
214	0	in = intermediate;
215	0	output = final_output;
216	0	}
217
218		// Rows
219	0	for (i = 0; i < 8; ++i) {
220	0	for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
221	0	}
222	0	}
223
224		#if CONFIG_AV1_HIGHBITDEPTH
225		void aom_highbd_fdct8x8_c(const int16_t input, tran_low_t final_output,
226	0	int stride) {
227	0	aom_fdct8x8_c(input, final_output, stride);
228	0	}
229		#endif