/src/libvpx/vpx_dsp/inv_txfm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <math.h> |
12 | | #include <stdlib.h> |
13 | | #include <string.h> |
14 | | |
15 | | #include "./vpx_dsp_rtcd.h" |
16 | | #include "vpx_dsp/inv_txfm.h" |
17 | | |
18 | 0 | void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
19 | | /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
20 | | 0.5 shifts per pixel. */ |
21 | 0 | int i; |
22 | 0 | tran_low_t output[16]; |
23 | 0 | tran_high_t a1, b1, c1, d1, e1; |
24 | 0 | const tran_low_t *ip = input; |
25 | 0 | tran_low_t *op = output; |
26 | |
|
27 | 0 | for (i = 0; i < 4; i++) { |
28 | 0 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
29 | 0 | c1 = ip[1] >> UNIT_QUANT_SHIFT; |
30 | 0 | d1 = ip[2] >> UNIT_QUANT_SHIFT; |
31 | 0 | b1 = ip[3] >> UNIT_QUANT_SHIFT; |
32 | 0 | a1 += c1; |
33 | 0 | d1 -= b1; |
34 | 0 | e1 = (a1 - d1) >> 1; |
35 | 0 | b1 = e1 - b1; |
36 | 0 | c1 = e1 - c1; |
37 | 0 | a1 -= b1; |
38 | 0 | d1 += c1; |
39 | 0 | op[0] = WRAPLOW(a1); |
40 | 0 | op[1] = WRAPLOW(b1); |
41 | 0 | op[2] = WRAPLOW(c1); |
42 | 0 | op[3] = WRAPLOW(d1); |
43 | 0 | ip += 4; |
44 | 0 | op += 4; |
45 | 0 | } |
46 | |
|
47 | 0 | ip = output; |
48 | 0 | for (i = 0; i < 4; i++) { |
49 | 0 | a1 = ip[4 * 0]; |
50 | 0 | c1 = ip[4 * 1]; |
51 | 0 | d1 = ip[4 * 2]; |
52 | 0 | b1 = ip[4 * 3]; |
53 | 0 | a1 += c1; |
54 | 0 | d1 -= b1; |
55 | 0 | e1 = (a1 - d1) >> 1; |
56 | 0 | b1 = e1 - b1; |
57 | 0 | c1 = e1 - c1; |
58 | 0 | a1 -= b1; |
59 | 0 | d1 += c1; |
60 | 0 | dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1)); |
61 | 0 | dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1)); |
62 | 0 | dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1)); |
63 | 0 | dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1)); |
64 | |
|
65 | 0 | ip++; |
66 | 0 | dest++; |
67 | 0 | } |
68 | 0 | } |
69 | | |
70 | 1.94M | void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
71 | 1.94M | int i; |
72 | 1.94M | tran_high_t a1, e1; |
73 | 1.94M | tran_low_t tmp[4]; |
74 | 1.94M | const tran_low_t *ip = input; |
75 | 1.94M | tran_low_t *op = tmp; |
76 | | |
77 | 1.94M | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
78 | 1.94M | e1 = a1 >> 1; |
79 | 1.94M | a1 -= e1; |
80 | 1.94M | op[0] = WRAPLOW(a1); |
81 | 1.94M | op[1] = op[2] = op[3] = WRAPLOW(e1); |
82 | | |
83 | 1.94M | ip = tmp; |
84 | 9.71M | for (i = 0; i < 4; i++) { |
85 | 7.76M | e1 = ip[0] >> 1; |
86 | 7.76M | a1 = ip[0] - e1; |
87 | 7.76M | dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); |
88 | 7.76M | dest[stride * 1] = clip_pixel_add(dest[stride * 1], e1); |
89 | 7.76M | dest[stride * 2] = clip_pixel_add(dest[stride * 2], e1); |
90 | 7.76M | dest[stride * 3] = clip_pixel_add(dest[stride * 3], e1); |
91 | 7.76M | ip++; |
92 | 7.76M | dest++; |
93 | 7.76M | } |
94 | 1.94M | } |
95 | | |
96 | 0 | void iadst4_c(const tran_low_t *input, tran_low_t *output) { |
97 | 0 | tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
98 | 0 | tran_low_t x0 = input[0]; |
99 | 0 | tran_low_t x1 = input[1]; |
100 | 0 | tran_low_t x2 = input[2]; |
101 | 0 | tran_low_t x3 = input[3]; |
102 | |
|
103 | 0 | if (!(x0 | x1 | x2 | x3)) { |
104 | 0 | memset(output, 0, 4 * sizeof(*output)); |
105 | 0 | return; |
106 | 0 | } |
107 | | |
108 | | // 32-bit result is enough for the following multiplications. |
109 | 0 | s0 = sinpi_1_9 * x0; |
110 | 0 | s1 = sinpi_2_9 * x0; |
111 | 0 | s2 = sinpi_3_9 * x1; |
112 | 0 | s3 = sinpi_4_9 * x2; |
113 | 0 | s4 = sinpi_1_9 * x2; |
114 | 0 | s5 = sinpi_2_9 * x3; |
115 | 0 | s6 = sinpi_4_9 * x3; |
116 | 0 | s7 = WRAPLOW(x0 - x2 + x3); |
117 | |
|
118 | 0 | s0 = s0 + s3 + s5; |
119 | 0 | s1 = s1 - s4 - s6; |
120 | 0 | s3 = s2; |
121 | 0 | s2 = sinpi_3_9 * s7; |
122 | | |
123 | | // 1-D transform scaling factor is sqrt(2). |
124 | | // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
125 | | // + 1b (addition) = 29b. |
126 | | // Hence the output bit depth is 15b. |
127 | 0 | output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); |
128 | 0 | output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); |
129 | 0 | output[2] = WRAPLOW(dct_const_round_shift(s2)); |
130 | 0 | output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); |
131 | 0 | } |
132 | | |
133 | 0 | void idct4_c(const tran_low_t *input, tran_low_t *output) { |
134 | 0 | int16_t step[4]; |
135 | 0 | tran_high_t temp1, temp2; |
136 | | |
137 | | // stage 1 |
138 | 0 | temp1 = ((int16_t)input[0] + (int16_t)input[2]) * cospi_16_64; |
139 | 0 | temp2 = ((int16_t)input[0] - (int16_t)input[2]) * cospi_16_64; |
140 | 0 | step[0] = WRAPLOW(dct_const_round_shift(temp1)); |
141 | 0 | step[1] = WRAPLOW(dct_const_round_shift(temp2)); |
142 | 0 | temp1 = (int16_t)input[1] * cospi_24_64 - (int16_t)input[3] * cospi_8_64; |
143 | 0 | temp2 = (int16_t)input[1] * cospi_8_64 + (int16_t)input[3] * cospi_24_64; |
144 | 0 | step[2] = WRAPLOW(dct_const_round_shift(temp1)); |
145 | 0 | step[3] = WRAPLOW(dct_const_round_shift(temp2)); |
146 | | |
147 | | // stage 2 |
148 | 0 | output[0] = WRAPLOW(step[0] + step[3]); |
149 | 0 | output[1] = WRAPLOW(step[1] + step[2]); |
150 | 0 | output[2] = WRAPLOW(step[1] - step[2]); |
151 | 0 | output[3] = WRAPLOW(step[0] - step[3]); |
152 | 0 | } |
153 | | |
154 | 0 | void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
155 | 0 | int i, j; |
156 | 0 | tran_low_t out[4 * 4]; |
157 | 0 | tran_low_t *outptr = out; |
158 | 0 | tran_low_t temp_in[4], temp_out[4]; |
159 | | |
160 | | // Rows |
161 | 0 | for (i = 0; i < 4; ++i) { |
162 | 0 | idct4_c(input, outptr); |
163 | 0 | input += 4; |
164 | 0 | outptr += 4; |
165 | 0 | } |
166 | | |
167 | | // Columns |
168 | 0 | for (i = 0; i < 4; ++i) { |
169 | 0 | for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; |
170 | 0 | idct4_c(temp_in, temp_out); |
171 | 0 | for (j = 0; j < 4; ++j) { |
172 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
173 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 4)); |
174 | 0 | } |
175 | 0 | } |
176 | 0 | } |
177 | | |
178 | 0 | void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
179 | 0 | int i; |
180 | 0 | tran_high_t a1; |
181 | 0 | tran_low_t out = |
182 | 0 | WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); |
183 | |
|
184 | 0 | out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); |
185 | 0 | a1 = ROUND_POWER_OF_TWO(out, 4); |
186 | |
|
187 | 0 | for (i = 0; i < 4; i++) { |
188 | 0 | dest[0] = clip_pixel_add(dest[0], a1); |
189 | 0 | dest[1] = clip_pixel_add(dest[1], a1); |
190 | 0 | dest[2] = clip_pixel_add(dest[2], a1); |
191 | 0 | dest[3] = clip_pixel_add(dest[3], a1); |
192 | 0 | dest += stride; |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | 0 | void iadst8_c(const tran_low_t *input, tran_low_t *output) { |
197 | 0 | int s0, s1, s2, s3, s4, s5, s6, s7; |
198 | 0 | tran_high_t x0 = input[7]; |
199 | 0 | tran_high_t x1 = input[0]; |
200 | 0 | tran_high_t x2 = input[5]; |
201 | 0 | tran_high_t x3 = input[2]; |
202 | 0 | tran_high_t x4 = input[3]; |
203 | 0 | tran_high_t x5 = input[4]; |
204 | 0 | tran_high_t x6 = input[1]; |
205 | 0 | tran_high_t x7 = input[6]; |
206 | |
|
207 | 0 | if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
208 | 0 | memset(output, 0, 8 * sizeof(*output)); |
209 | 0 | return; |
210 | 0 | } |
211 | | |
212 | | // stage 1 |
213 | 0 | s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); |
214 | 0 | s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); |
215 | 0 | s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); |
216 | 0 | s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); |
217 | 0 | s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); |
218 | 0 | s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); |
219 | 0 | s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); |
220 | 0 | s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); |
221 | |
|
222 | 0 | x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); |
223 | 0 | x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); |
224 | 0 | x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); |
225 | 0 | x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); |
226 | 0 | x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); |
227 | 0 | x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); |
228 | 0 | x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); |
229 | 0 | x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); |
230 | | |
231 | | // stage 2 |
232 | 0 | s0 = (int)x0; |
233 | 0 | s1 = (int)x1; |
234 | 0 | s2 = (int)x2; |
235 | 0 | s3 = (int)x3; |
236 | 0 | s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); |
237 | 0 | s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); |
238 | 0 | s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); |
239 | 0 | s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); |
240 | |
|
241 | 0 | x0 = WRAPLOW(s0 + s2); |
242 | 0 | x1 = WRAPLOW(s1 + s3); |
243 | 0 | x2 = WRAPLOW(s0 - s2); |
244 | 0 | x3 = WRAPLOW(s1 - s3); |
245 | 0 | x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); |
246 | 0 | x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); |
247 | 0 | x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); |
248 | 0 | x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); |
249 | | |
250 | | // stage 3 |
251 | 0 | s2 = (int)(cospi_16_64 * (x2 + x3)); |
252 | 0 | s3 = (int)(cospi_16_64 * (x2 - x3)); |
253 | 0 | s6 = (int)(cospi_16_64 * (x6 + x7)); |
254 | 0 | s7 = (int)(cospi_16_64 * (x6 - x7)); |
255 | |
|
256 | 0 | x2 = WRAPLOW(dct_const_round_shift(s2)); |
257 | 0 | x3 = WRAPLOW(dct_const_round_shift(s3)); |
258 | 0 | x6 = WRAPLOW(dct_const_round_shift(s6)); |
259 | 0 | x7 = WRAPLOW(dct_const_round_shift(s7)); |
260 | |
|
261 | 0 | output[0] = WRAPLOW(x0); |
262 | 0 | output[1] = WRAPLOW(-x4); |
263 | 0 | output[2] = WRAPLOW(x6); |
264 | 0 | output[3] = WRAPLOW(-x2); |
265 | 0 | output[4] = WRAPLOW(x3); |
266 | 0 | output[5] = WRAPLOW(-x7); |
267 | 0 | output[6] = WRAPLOW(x5); |
268 | 0 | output[7] = WRAPLOW(-x1); |
269 | 0 | } |
270 | | |
271 | 0 | void idct8_c(const tran_low_t *input, tran_low_t *output) { |
272 | 0 | int16_t step1[8], step2[8]; |
273 | 0 | tran_high_t temp1, temp2; |
274 | | |
275 | | // stage 1 |
276 | 0 | step1[0] = (int16_t)input[0]; |
277 | 0 | step1[2] = (int16_t)input[4]; |
278 | 0 | step1[1] = (int16_t)input[2]; |
279 | 0 | step1[3] = (int16_t)input[6]; |
280 | 0 | temp1 = (int16_t)input[1] * cospi_28_64 - (int16_t)input[7] * cospi_4_64; |
281 | 0 | temp2 = (int16_t)input[1] * cospi_4_64 + (int16_t)input[7] * cospi_28_64; |
282 | 0 | step1[4] = WRAPLOW(dct_const_round_shift(temp1)); |
283 | 0 | step1[7] = WRAPLOW(dct_const_round_shift(temp2)); |
284 | 0 | temp1 = (int16_t)input[5] * cospi_12_64 - (int16_t)input[3] * cospi_20_64; |
285 | 0 | temp2 = (int16_t)input[5] * cospi_20_64 + (int16_t)input[3] * cospi_12_64; |
286 | 0 | step1[5] = WRAPLOW(dct_const_round_shift(temp1)); |
287 | 0 | step1[6] = WRAPLOW(dct_const_round_shift(temp2)); |
288 | | |
289 | | // stage 2 |
290 | 0 | temp1 = (step1[0] + step1[2]) * cospi_16_64; |
291 | 0 | temp2 = (step1[0] - step1[2]) * cospi_16_64; |
292 | 0 | step2[0] = WRAPLOW(dct_const_round_shift(temp1)); |
293 | 0 | step2[1] = WRAPLOW(dct_const_round_shift(temp2)); |
294 | 0 | temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; |
295 | 0 | temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; |
296 | 0 | step2[2] = WRAPLOW(dct_const_round_shift(temp1)); |
297 | 0 | step2[3] = WRAPLOW(dct_const_round_shift(temp2)); |
298 | 0 | step2[4] = WRAPLOW(step1[4] + step1[5]); |
299 | 0 | step2[5] = WRAPLOW(step1[4] - step1[5]); |
300 | 0 | step2[6] = WRAPLOW(-step1[6] + step1[7]); |
301 | 0 | step2[7] = WRAPLOW(step1[6] + step1[7]); |
302 | | |
303 | | // stage 3 |
304 | 0 | step1[0] = WRAPLOW(step2[0] + step2[3]); |
305 | 0 | step1[1] = WRAPLOW(step2[1] + step2[2]); |
306 | 0 | step1[2] = WRAPLOW(step2[1] - step2[2]); |
307 | 0 | step1[3] = WRAPLOW(step2[0] - step2[3]); |
308 | 0 | step1[4] = step2[4]; |
309 | 0 | temp1 = (step2[6] - step2[5]) * cospi_16_64; |
310 | 0 | temp2 = (step2[5] + step2[6]) * cospi_16_64; |
311 | 0 | step1[5] = WRAPLOW(dct_const_round_shift(temp1)); |
312 | 0 | step1[6] = WRAPLOW(dct_const_round_shift(temp2)); |
313 | 0 | step1[7] = step2[7]; |
314 | | |
315 | | // stage 4 |
316 | 0 | output[0] = WRAPLOW(step1[0] + step1[7]); |
317 | 0 | output[1] = WRAPLOW(step1[1] + step1[6]); |
318 | 0 | output[2] = WRAPLOW(step1[2] + step1[5]); |
319 | 0 | output[3] = WRAPLOW(step1[3] + step1[4]); |
320 | 0 | output[4] = WRAPLOW(step1[3] - step1[4]); |
321 | 0 | output[5] = WRAPLOW(step1[2] - step1[5]); |
322 | 0 | output[6] = WRAPLOW(step1[1] - step1[6]); |
323 | 0 | output[7] = WRAPLOW(step1[0] - step1[7]); |
324 | 0 | } |
325 | | |
326 | 0 | void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
327 | 0 | int i, j; |
328 | 0 | tran_low_t out[8 * 8]; |
329 | 0 | tran_low_t *outptr = out; |
330 | 0 | tran_low_t temp_in[8], temp_out[8]; |
331 | | |
332 | | // First transform rows |
333 | 0 | for (i = 0; i < 8; ++i) { |
334 | 0 | idct8_c(input, outptr); |
335 | 0 | input += 8; |
336 | 0 | outptr += 8; |
337 | 0 | } |
338 | | |
339 | | // Then transform columns |
340 | 0 | for (i = 0; i < 8; ++i) { |
341 | 0 | for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; |
342 | 0 | idct8_c(temp_in, temp_out); |
343 | 0 | for (j = 0; j < 8; ++j) { |
344 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
345 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 5)); |
346 | 0 | } |
347 | 0 | } |
348 | 0 | } |
349 | | |
350 | 0 | void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
351 | 0 | int i, j; |
352 | 0 | tran_low_t out[8 * 8] = { 0 }; |
353 | 0 | tran_low_t *outptr = out; |
354 | 0 | tran_low_t temp_in[8], temp_out[8]; |
355 | | |
356 | | // First transform rows |
357 | | // Only first 4 row has non-zero coefs |
358 | 0 | for (i = 0; i < 4; ++i) { |
359 | 0 | idct8_c(input, outptr); |
360 | 0 | input += 8; |
361 | 0 | outptr += 8; |
362 | 0 | } |
363 | | |
364 | | // Then transform columns |
365 | 0 | for (i = 0; i < 8; ++i) { |
366 | 0 | for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; |
367 | 0 | idct8_c(temp_in, temp_out); |
368 | 0 | for (j = 0; j < 8; ++j) { |
369 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
370 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 5)); |
371 | 0 | } |
372 | 0 | } |
373 | 0 | } |
374 | | |
375 | 0 | void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
376 | 0 | int i, j; |
377 | 0 | tran_high_t a1; |
378 | 0 | tran_low_t out = |
379 | 0 | WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); |
380 | |
|
381 | 0 | out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); |
382 | 0 | a1 = ROUND_POWER_OF_TWO(out, 5); |
383 | 0 | for (j = 0; j < 8; ++j) { |
384 | 0 | for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1); |
385 | 0 | dest += stride; |
386 | 0 | } |
387 | 0 | } |
388 | | |
389 | 0 | void iadst16_c(const tran_low_t *input, tran_low_t *output) { |
390 | 0 | tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
391 | 0 | tran_high_t s9, s10, s11, s12, s13, s14, s15; |
392 | 0 | tran_high_t x0 = input[15]; |
393 | 0 | tran_high_t x1 = input[0]; |
394 | 0 | tran_high_t x2 = input[13]; |
395 | 0 | tran_high_t x3 = input[2]; |
396 | 0 | tran_high_t x4 = input[11]; |
397 | 0 | tran_high_t x5 = input[4]; |
398 | 0 | tran_high_t x6 = input[9]; |
399 | 0 | tran_high_t x7 = input[6]; |
400 | 0 | tran_high_t x8 = input[7]; |
401 | 0 | tran_high_t x9 = input[8]; |
402 | 0 | tran_high_t x10 = input[5]; |
403 | 0 | tran_high_t x11 = input[10]; |
404 | 0 | tran_high_t x12 = input[3]; |
405 | 0 | tran_high_t x13 = input[12]; |
406 | 0 | tran_high_t x14 = input[1]; |
407 | 0 | tran_high_t x15 = input[14]; |
408 | |
|
409 | 0 | if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | |
410 | 0 | x13 | x14 | x15)) { |
411 | 0 | memset(output, 0, 16 * sizeof(*output)); |
412 | 0 | return; |
413 | 0 | } |
414 | | |
415 | | // stage 1 |
416 | 0 | s0 = x0 * cospi_1_64 + x1 * cospi_31_64; |
417 | 0 | s1 = x0 * cospi_31_64 - x1 * cospi_1_64; |
418 | 0 | s2 = x2 * cospi_5_64 + x3 * cospi_27_64; |
419 | 0 | s3 = x2 * cospi_27_64 - x3 * cospi_5_64; |
420 | 0 | s4 = x4 * cospi_9_64 + x5 * cospi_23_64; |
421 | 0 | s5 = x4 * cospi_23_64 - x5 * cospi_9_64; |
422 | 0 | s6 = x6 * cospi_13_64 + x7 * cospi_19_64; |
423 | 0 | s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
424 | 0 | s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
425 | 0 | s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
426 | 0 | s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
427 | 0 | s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
428 | 0 | s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
429 | 0 | s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
430 | 0 | s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
431 | 0 | s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
432 | |
|
433 | 0 | x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); |
434 | 0 | x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); |
435 | 0 | x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); |
436 | 0 | x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); |
437 | 0 | x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); |
438 | 0 | x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); |
439 | 0 | x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); |
440 | 0 | x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); |
441 | 0 | x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); |
442 | 0 | x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); |
443 | 0 | x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); |
444 | 0 | x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); |
445 | 0 | x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); |
446 | 0 | x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); |
447 | 0 | x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); |
448 | 0 | x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); |
449 | | |
450 | | // stage 2 |
451 | 0 | s0 = x0; |
452 | 0 | s1 = x1; |
453 | 0 | s2 = x2; |
454 | 0 | s3 = x3; |
455 | 0 | s4 = x4; |
456 | 0 | s5 = x5; |
457 | 0 | s6 = x6; |
458 | 0 | s7 = x7; |
459 | 0 | s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
460 | 0 | s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
461 | 0 | s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
462 | 0 | s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
463 | 0 | s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; |
464 | 0 | s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
465 | 0 | s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; |
466 | 0 | s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
467 | |
|
468 | 0 | x0 = WRAPLOW(s0 + s4); |
469 | 0 | x1 = WRAPLOW(s1 + s5); |
470 | 0 | x2 = WRAPLOW(s2 + s6); |
471 | 0 | x3 = WRAPLOW(s3 + s7); |
472 | 0 | x4 = WRAPLOW(s0 - s4); |
473 | 0 | x5 = WRAPLOW(s1 - s5); |
474 | 0 | x6 = WRAPLOW(s2 - s6); |
475 | 0 | x7 = WRAPLOW(s3 - s7); |
476 | 0 | x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); |
477 | 0 | x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); |
478 | 0 | x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); |
479 | 0 | x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); |
480 | 0 | x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); |
481 | 0 | x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); |
482 | 0 | x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); |
483 | 0 | x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); |
484 | | |
485 | | // stage 3 |
486 | 0 | s0 = x0; |
487 | 0 | s1 = x1; |
488 | 0 | s2 = x2; |
489 | 0 | s3 = x3; |
490 | 0 | s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
491 | 0 | s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
492 | 0 | s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; |
493 | 0 | s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
494 | 0 | s8 = x8; |
495 | 0 | s9 = x9; |
496 | 0 | s10 = x10; |
497 | 0 | s11 = x11; |
498 | 0 | s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
499 | 0 | s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
500 | 0 | s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; |
501 | 0 | s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
502 | |
|
503 | 0 | x0 = WRAPLOW(s0 + s2); |
504 | 0 | x1 = WRAPLOW(s1 + s3); |
505 | 0 | x2 = WRAPLOW(s0 - s2); |
506 | 0 | x3 = WRAPLOW(s1 - s3); |
507 | 0 | x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); |
508 | 0 | x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); |
509 | 0 | x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); |
510 | 0 | x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); |
511 | 0 | x8 = WRAPLOW(s8 + s10); |
512 | 0 | x9 = WRAPLOW(s9 + s11); |
513 | 0 | x10 = WRAPLOW(s8 - s10); |
514 | 0 | x11 = WRAPLOW(s9 - s11); |
515 | 0 | x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); |
516 | 0 | x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); |
517 | 0 | x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); |
518 | 0 | x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); |
519 | | |
520 | | // stage 4 |
521 | 0 | s2 = (-cospi_16_64) * (x2 + x3); |
522 | 0 | s3 = cospi_16_64 * (x2 - x3); |
523 | 0 | s6 = cospi_16_64 * (x6 + x7); |
524 | 0 | s7 = cospi_16_64 * (-x6 + x7); |
525 | 0 | s10 = cospi_16_64 * (x10 + x11); |
526 | 0 | s11 = cospi_16_64 * (-x10 + x11); |
527 | 0 | s14 = (-cospi_16_64) * (x14 + x15); |
528 | 0 | s15 = cospi_16_64 * (x14 - x15); |
529 | |
|
530 | 0 | x2 = WRAPLOW(dct_const_round_shift(s2)); |
531 | 0 | x3 = WRAPLOW(dct_const_round_shift(s3)); |
532 | 0 | x6 = WRAPLOW(dct_const_round_shift(s6)); |
533 | 0 | x7 = WRAPLOW(dct_const_round_shift(s7)); |
534 | 0 | x10 = WRAPLOW(dct_const_round_shift(s10)); |
535 | 0 | x11 = WRAPLOW(dct_const_round_shift(s11)); |
536 | 0 | x14 = WRAPLOW(dct_const_round_shift(s14)); |
537 | 0 | x15 = WRAPLOW(dct_const_round_shift(s15)); |
538 | |
|
539 | 0 | output[0] = WRAPLOW(x0); |
540 | 0 | output[1] = WRAPLOW(-x8); |
541 | 0 | output[2] = WRAPLOW(x12); |
542 | 0 | output[3] = WRAPLOW(-x4); |
543 | 0 | output[4] = WRAPLOW(x6); |
544 | 0 | output[5] = WRAPLOW(x14); |
545 | 0 | output[6] = WRAPLOW(x10); |
546 | 0 | output[7] = WRAPLOW(x2); |
547 | 0 | output[8] = WRAPLOW(x3); |
548 | 0 | output[9] = WRAPLOW(x11); |
549 | 0 | output[10] = WRAPLOW(x15); |
550 | 0 | output[11] = WRAPLOW(x7); |
551 | 0 | output[12] = WRAPLOW(x5); |
552 | 0 | output[13] = WRAPLOW(-x13); |
553 | 0 | output[14] = WRAPLOW(x9); |
554 | 0 | output[15] = WRAPLOW(-x1); |
555 | 0 | } |
556 | | |
557 | 0 | void idct16_c(const tran_low_t *input, tran_low_t *output) { |
558 | 0 | int16_t step1[16], step2[16]; |
559 | 0 | tran_high_t temp1, temp2; |
560 | | |
561 | | // stage 1 |
562 | 0 | step1[0] = (int16_t)input[0 / 2]; |
563 | 0 | step1[1] = (int16_t)input[16 / 2]; |
564 | 0 | step1[2] = (int16_t)input[8 / 2]; |
565 | 0 | step1[3] = (int16_t)input[24 / 2]; |
566 | 0 | step1[4] = (int16_t)input[4 / 2]; |
567 | 0 | step1[5] = (int16_t)input[20 / 2]; |
568 | 0 | step1[6] = (int16_t)input[12 / 2]; |
569 | 0 | step1[7] = (int16_t)input[28 / 2]; |
570 | 0 | step1[8] = (int16_t)input[2 / 2]; |
571 | 0 | step1[9] = (int16_t)input[18 / 2]; |
572 | 0 | step1[10] = (int16_t)input[10 / 2]; |
573 | 0 | step1[11] = (int16_t)input[26 / 2]; |
574 | 0 | step1[12] = (int16_t)input[6 / 2]; |
575 | 0 | step1[13] = (int16_t)input[22 / 2]; |
576 | 0 | step1[14] = (int16_t)input[14 / 2]; |
577 | 0 | step1[15] = (int16_t)input[30 / 2]; |
578 | | |
579 | | // stage 2 |
580 | 0 | step2[0] = step1[0]; |
581 | 0 | step2[1] = step1[1]; |
582 | 0 | step2[2] = step1[2]; |
583 | 0 | step2[3] = step1[3]; |
584 | 0 | step2[4] = step1[4]; |
585 | 0 | step2[5] = step1[5]; |
586 | 0 | step2[6] = step1[6]; |
587 | 0 | step2[7] = step1[7]; |
588 | |
|
589 | 0 | temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
590 | 0 | temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
591 | 0 | step2[8] = WRAPLOW(dct_const_round_shift(temp1)); |
592 | 0 | step2[15] = WRAPLOW(dct_const_round_shift(temp2)); |
593 | |
|
594 | 0 | temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
595 | 0 | temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
596 | 0 | step2[9] = WRAPLOW(dct_const_round_shift(temp1)); |
597 | 0 | step2[14] = WRAPLOW(dct_const_round_shift(temp2)); |
598 | |
|
599 | 0 | temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
600 | 0 | temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
601 | 0 | step2[10] = WRAPLOW(dct_const_round_shift(temp1)); |
602 | 0 | step2[13] = WRAPLOW(dct_const_round_shift(temp2)); |
603 | |
|
604 | 0 | temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
605 | 0 | temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
606 | 0 | step2[11] = WRAPLOW(dct_const_round_shift(temp1)); |
607 | 0 | step2[12] = WRAPLOW(dct_const_round_shift(temp2)); |
608 | | |
609 | | // stage 3 |
610 | 0 | step1[0] = step2[0]; |
611 | 0 | step1[1] = step2[1]; |
612 | 0 | step1[2] = step2[2]; |
613 | 0 | step1[3] = step2[3]; |
614 | |
|
615 | 0 | temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
616 | 0 | temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
617 | 0 | step1[4] = WRAPLOW(dct_const_round_shift(temp1)); |
618 | 0 | step1[7] = WRAPLOW(dct_const_round_shift(temp2)); |
619 | 0 | temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
620 | 0 | temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
621 | 0 | step1[5] = WRAPLOW(dct_const_round_shift(temp1)); |
622 | 0 | step1[6] = WRAPLOW(dct_const_round_shift(temp2)); |
623 | |
|
624 | 0 | step1[8] = WRAPLOW(step2[8] + step2[9]); |
625 | 0 | step1[9] = WRAPLOW(step2[8] - step2[9]); |
626 | 0 | step1[10] = WRAPLOW(-step2[10] + step2[11]); |
627 | 0 | step1[11] = WRAPLOW(step2[10] + step2[11]); |
628 | 0 | step1[12] = WRAPLOW(step2[12] + step2[13]); |
629 | 0 | step1[13] = WRAPLOW(step2[12] - step2[13]); |
630 | 0 | step1[14] = WRAPLOW(-step2[14] + step2[15]); |
631 | 0 | step1[15] = WRAPLOW(step2[14] + step2[15]); |
632 | | |
633 | | // stage 4 |
634 | 0 | temp1 = (step1[0] + step1[1]) * cospi_16_64; |
635 | 0 | temp2 = (step1[0] - step1[1]) * cospi_16_64; |
636 | 0 | step2[0] = WRAPLOW(dct_const_round_shift(temp1)); |
637 | 0 | step2[1] = WRAPLOW(dct_const_round_shift(temp2)); |
638 | 0 | temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
639 | 0 | temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
640 | 0 | step2[2] = WRAPLOW(dct_const_round_shift(temp1)); |
641 | 0 | step2[3] = WRAPLOW(dct_const_round_shift(temp2)); |
642 | 0 | step2[4] = WRAPLOW(step1[4] + step1[5]); |
643 | 0 | step2[5] = WRAPLOW(step1[4] - step1[5]); |
644 | 0 | step2[6] = WRAPLOW(-step1[6] + step1[7]); |
645 | 0 | step2[7] = WRAPLOW(step1[6] + step1[7]); |
646 | |
|
647 | 0 | step2[8] = step1[8]; |
648 | 0 | step2[15] = step1[15]; |
649 | 0 | temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
650 | 0 | temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
651 | 0 | step2[9] = WRAPLOW(dct_const_round_shift(temp1)); |
652 | 0 | step2[14] = WRAPLOW(dct_const_round_shift(temp2)); |
653 | 0 | temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
654 | 0 | temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
655 | 0 | step2[10] = WRAPLOW(dct_const_round_shift(temp1)); |
656 | 0 | step2[13] = WRAPLOW(dct_const_round_shift(temp2)); |
657 | 0 | step2[11] = step1[11]; |
658 | 0 | step2[12] = step1[12]; |
659 | | |
660 | | // stage 5 |
661 | 0 | step1[0] = WRAPLOW(step2[0] + step2[3]); |
662 | 0 | step1[1] = WRAPLOW(step2[1] + step2[2]); |
663 | 0 | step1[2] = WRAPLOW(step2[1] - step2[2]); |
664 | 0 | step1[3] = WRAPLOW(step2[0] - step2[3]); |
665 | 0 | step1[4] = step2[4]; |
666 | 0 | temp1 = (step2[6] - step2[5]) * cospi_16_64; |
667 | 0 | temp2 = (step2[5] + step2[6]) * cospi_16_64; |
668 | 0 | step1[5] = WRAPLOW(dct_const_round_shift(temp1)); |
669 | 0 | step1[6] = WRAPLOW(dct_const_round_shift(temp2)); |
670 | 0 | step1[7] = step2[7]; |
671 | |
|
672 | 0 | step1[8] = WRAPLOW(step2[8] + step2[11]); |
673 | 0 | step1[9] = WRAPLOW(step2[9] + step2[10]); |
674 | 0 | step1[10] = WRAPLOW(step2[9] - step2[10]); |
675 | 0 | step1[11] = WRAPLOW(step2[8] - step2[11]); |
676 | 0 | step1[12] = WRAPLOW(-step2[12] + step2[15]); |
677 | 0 | step1[13] = WRAPLOW(-step2[13] + step2[14]); |
678 | 0 | step1[14] = WRAPLOW(step2[13] + step2[14]); |
679 | 0 | step1[15] = WRAPLOW(step2[12] + step2[15]); |
680 | | |
681 | | // stage 6 |
682 | 0 | step2[0] = WRAPLOW(step1[0] + step1[7]); |
683 | 0 | step2[1] = WRAPLOW(step1[1] + step1[6]); |
684 | 0 | step2[2] = WRAPLOW(step1[2] + step1[5]); |
685 | 0 | step2[3] = WRAPLOW(step1[3] + step1[4]); |
686 | 0 | step2[4] = WRAPLOW(step1[3] - step1[4]); |
687 | 0 | step2[5] = WRAPLOW(step1[2] - step1[5]); |
688 | 0 | step2[6] = WRAPLOW(step1[1] - step1[6]); |
689 | 0 | step2[7] = WRAPLOW(step1[0] - step1[7]); |
690 | 0 | step2[8] = step1[8]; |
691 | 0 | step2[9] = step1[9]; |
692 | 0 | temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
693 | 0 | temp2 = (step1[10] + step1[13]) * cospi_16_64; |
694 | 0 | step2[10] = WRAPLOW(dct_const_round_shift(temp1)); |
695 | 0 | step2[13] = WRAPLOW(dct_const_round_shift(temp2)); |
696 | 0 | temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
697 | 0 | temp2 = (step1[11] + step1[12]) * cospi_16_64; |
698 | 0 | step2[11] = WRAPLOW(dct_const_round_shift(temp1)); |
699 | 0 | step2[12] = WRAPLOW(dct_const_round_shift(temp2)); |
700 | 0 | step2[14] = step1[14]; |
701 | 0 | step2[15] = step1[15]; |
702 | | |
703 | | // stage 7 |
704 | 0 | output[0] = (tran_low_t)WRAPLOW(step2[0] + step2[15]); |
705 | 0 | output[1] = (tran_low_t)WRAPLOW(step2[1] + step2[14]); |
706 | 0 | output[2] = (tran_low_t)WRAPLOW(step2[2] + step2[13]); |
707 | 0 | output[3] = (tran_low_t)WRAPLOW(step2[3] + step2[12]); |
708 | 0 | output[4] = (tran_low_t)WRAPLOW(step2[4] + step2[11]); |
709 | 0 | output[5] = (tran_low_t)WRAPLOW(step2[5] + step2[10]); |
710 | 0 | output[6] = (tran_low_t)WRAPLOW(step2[6] + step2[9]); |
711 | 0 | output[7] = (tran_low_t)WRAPLOW(step2[7] + step2[8]); |
712 | 0 | output[8] = (tran_low_t)WRAPLOW(step2[7] - step2[8]); |
713 | 0 | output[9] = (tran_low_t)WRAPLOW(step2[6] - step2[9]); |
714 | 0 | output[10] = (tran_low_t)WRAPLOW(step2[5] - step2[10]); |
715 | 0 | output[11] = (tran_low_t)WRAPLOW(step2[4] - step2[11]); |
716 | 0 | output[12] = (tran_low_t)WRAPLOW(step2[3] - step2[12]); |
717 | 0 | output[13] = (tran_low_t)WRAPLOW(step2[2] - step2[13]); |
718 | 0 | output[14] = (tran_low_t)WRAPLOW(step2[1] - step2[14]); |
719 | 0 | output[15] = (tran_low_t)WRAPLOW(step2[0] - step2[15]); |
720 | 0 | } |
721 | | |
722 | | void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, |
723 | 0 | int stride) { |
724 | 0 | int i, j; |
725 | 0 | tran_low_t out[16 * 16]; |
726 | 0 | tran_low_t *outptr = out; |
727 | 0 | tran_low_t temp_in[16], temp_out[16]; |
728 | | |
729 | | // First transform rows |
730 | 0 | for (i = 0; i < 16; ++i) { |
731 | 0 | idct16_c(input, outptr); |
732 | 0 | input += 16; |
733 | 0 | outptr += 16; |
734 | 0 | } |
735 | | |
736 | | // Then transform columns |
737 | 0 | for (i = 0; i < 16; ++i) { |
738 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
739 | 0 | idct16_c(temp_in, temp_out); |
740 | 0 | for (j = 0; j < 16; ++j) { |
741 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
742 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
743 | 0 | } |
744 | 0 | } |
745 | 0 | } |
746 | | |
747 | | void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, |
748 | 0 | int stride) { |
749 | 0 | int i, j; |
750 | 0 | tran_low_t out[16 * 16] = { 0 }; |
751 | 0 | tran_low_t *outptr = out; |
752 | 0 | tran_low_t temp_in[16], temp_out[16]; |
753 | | |
754 | | // First transform rows. Since all non-zero dct coefficients are in |
755 | | // upper-left 8x8 area, we only need to calculate first 8 rows here. |
756 | 0 | for (i = 0; i < 8; ++i) { |
757 | 0 | idct16_c(input, outptr); |
758 | 0 | input += 16; |
759 | 0 | outptr += 16; |
760 | 0 | } |
761 | | |
762 | | // Then transform columns |
763 | 0 | for (i = 0; i < 16; ++i) { |
764 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
765 | 0 | idct16_c(temp_in, temp_out); |
766 | 0 | for (j = 0; j < 16; ++j) { |
767 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
768 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
769 | 0 | } |
770 | 0 | } |
771 | 0 | } |
772 | | |
773 | | void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, |
774 | 0 | int stride) { |
775 | 0 | int i, j; |
776 | 0 | tran_low_t out[16 * 16] = { 0 }; |
777 | 0 | tran_low_t *outptr = out; |
778 | 0 | tran_low_t temp_in[16], temp_out[16]; |
779 | | |
780 | | // First transform rows. Since all non-zero dct coefficients are in |
781 | | // upper-left 4x4 area, we only need to calculate first 4 rows here. |
782 | 0 | for (i = 0; i < 4; ++i) { |
783 | 0 | idct16_c(input, outptr); |
784 | 0 | input += 16; |
785 | 0 | outptr += 16; |
786 | 0 | } |
787 | | |
788 | | // Then transform columns |
789 | 0 | for (i = 0; i < 16; ++i) { |
790 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
791 | 0 | idct16_c(temp_in, temp_out); |
792 | 0 | for (j = 0; j < 16; ++j) { |
793 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
794 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
795 | 0 | } |
796 | 0 | } |
797 | 0 | } |
798 | | |
799 | 0 | void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
800 | 0 | int i, j; |
801 | 0 | tran_high_t a1; |
802 | 0 | tran_low_t out = |
803 | 0 | WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); |
804 | |
|
805 | 0 | out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); |
806 | 0 | a1 = ROUND_POWER_OF_TWO(out, 6); |
807 | 0 | for (j = 0; j < 16; ++j) { |
808 | 0 | for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1); |
809 | 0 | dest += stride; |
810 | 0 | } |
811 | 0 | } |
812 | | |
813 | 0 | void idct32_c(const tran_low_t *input, tran_low_t *output) { |
814 | 0 | int16_t step1[32], step2[32]; |
815 | 0 | tran_high_t temp1, temp2; |
816 | | |
817 | | // stage 1 |
818 | 0 | step1[0] = (int16_t)input[0]; |
819 | 0 | step1[1] = (int16_t)input[16]; |
820 | 0 | step1[2] = (int16_t)input[8]; |
821 | 0 | step1[3] = (int16_t)input[24]; |
822 | 0 | step1[4] = (int16_t)input[4]; |
823 | 0 | step1[5] = (int16_t)input[20]; |
824 | 0 | step1[6] = (int16_t)input[12]; |
825 | 0 | step1[7] = (int16_t)input[28]; |
826 | 0 | step1[8] = (int16_t)input[2]; |
827 | 0 | step1[9] = (int16_t)input[18]; |
828 | 0 | step1[10] = (int16_t)input[10]; |
829 | 0 | step1[11] = (int16_t)input[26]; |
830 | 0 | step1[12] = (int16_t)input[6]; |
831 | 0 | step1[13] = (int16_t)input[22]; |
832 | 0 | step1[14] = (int16_t)input[14]; |
833 | 0 | step1[15] = (int16_t)input[30]; |
834 | |
|
835 | 0 | temp1 = (int16_t)input[1] * cospi_31_64 - (int16_t)input[31] * cospi_1_64; |
836 | 0 | temp2 = (int16_t)input[1] * cospi_1_64 + (int16_t)input[31] * cospi_31_64; |
837 | 0 | step1[16] = WRAPLOW(dct_const_round_shift(temp1)); |
838 | 0 | step1[31] = WRAPLOW(dct_const_round_shift(temp2)); |
839 | |
|
840 | 0 | temp1 = (int16_t)input[17] * cospi_15_64 - (int16_t)input[15] * cospi_17_64; |
841 | 0 | temp2 = (int16_t)input[17] * cospi_17_64 + (int16_t)input[15] * cospi_15_64; |
842 | 0 | step1[17] = WRAPLOW(dct_const_round_shift(temp1)); |
843 | 0 | step1[30] = WRAPLOW(dct_const_round_shift(temp2)); |
844 | |
|
845 | 0 | temp1 = (int16_t)input[9] * cospi_23_64 - (int16_t)input[23] * cospi_9_64; |
846 | 0 | temp2 = (int16_t)input[9] * cospi_9_64 + (int16_t)input[23] * cospi_23_64; |
847 | 0 | step1[18] = WRAPLOW(dct_const_round_shift(temp1)); |
848 | 0 | step1[29] = WRAPLOW(dct_const_round_shift(temp2)); |
849 | |
|
850 | 0 | temp1 = (int16_t)input[25] * cospi_7_64 - (int16_t)input[7] * cospi_25_64; |
851 | 0 | temp2 = (int16_t)input[25] * cospi_25_64 + (int16_t)input[7] * cospi_7_64; |
852 | 0 | step1[19] = WRAPLOW(dct_const_round_shift(temp1)); |
853 | 0 | step1[28] = WRAPLOW(dct_const_round_shift(temp2)); |
854 | |
|
855 | 0 | temp1 = (int16_t)input[5] * cospi_27_64 - (int16_t)input[27] * cospi_5_64; |
856 | 0 | temp2 = (int16_t)input[5] * cospi_5_64 + (int16_t)input[27] * cospi_27_64; |
857 | 0 | step1[20] = WRAPLOW(dct_const_round_shift(temp1)); |
858 | 0 | step1[27] = WRAPLOW(dct_const_round_shift(temp2)); |
859 | |
|
860 | 0 | temp1 = (int16_t)input[21] * cospi_11_64 - (int16_t)input[11] * cospi_21_64; |
861 | 0 | temp2 = (int16_t)input[21] * cospi_21_64 + (int16_t)input[11] * cospi_11_64; |
862 | 0 | step1[21] = WRAPLOW(dct_const_round_shift(temp1)); |
863 | 0 | step1[26] = WRAPLOW(dct_const_round_shift(temp2)); |
864 | |
|
865 | 0 | temp1 = (int16_t)input[13] * cospi_19_64 - (int16_t)input[19] * cospi_13_64; |
866 | 0 | temp2 = (int16_t)input[13] * cospi_13_64 + (int16_t)input[19] * cospi_19_64; |
867 | 0 | step1[22] = WRAPLOW(dct_const_round_shift(temp1)); |
868 | 0 | step1[25] = WRAPLOW(dct_const_round_shift(temp2)); |
869 | |
|
870 | 0 | temp1 = (int16_t)input[29] * cospi_3_64 - (int16_t)input[3] * cospi_29_64; |
871 | 0 | temp2 = (int16_t)input[29] * cospi_29_64 + (int16_t)input[3] * cospi_3_64; |
872 | 0 | step1[23] = WRAPLOW(dct_const_round_shift(temp1)); |
873 | 0 | step1[24] = WRAPLOW(dct_const_round_shift(temp2)); |
874 | | |
875 | | // stage 2 |
876 | 0 | step2[0] = step1[0]; |
877 | 0 | step2[1] = step1[1]; |
878 | 0 | step2[2] = step1[2]; |
879 | 0 | step2[3] = step1[3]; |
880 | 0 | step2[4] = step1[4]; |
881 | 0 | step2[5] = step1[5]; |
882 | 0 | step2[6] = step1[6]; |
883 | 0 | step2[7] = step1[7]; |
884 | |
|
885 | 0 | temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
886 | 0 | temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
887 | 0 | step2[8] = WRAPLOW(dct_const_round_shift(temp1)); |
888 | 0 | step2[15] = WRAPLOW(dct_const_round_shift(temp2)); |
889 | |
|
890 | 0 | temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
891 | 0 | temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
892 | 0 | step2[9] = WRAPLOW(dct_const_round_shift(temp1)); |
893 | 0 | step2[14] = WRAPLOW(dct_const_round_shift(temp2)); |
894 | |
|
895 | 0 | temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
896 | 0 | temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
897 | 0 | step2[10] = WRAPLOW(dct_const_round_shift(temp1)); |
898 | 0 | step2[13] = WRAPLOW(dct_const_round_shift(temp2)); |
899 | |
|
900 | 0 | temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
901 | 0 | temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
902 | 0 | step2[11] = WRAPLOW(dct_const_round_shift(temp1)); |
903 | 0 | step2[12] = WRAPLOW(dct_const_round_shift(temp2)); |
904 | |
|
905 | 0 | step2[16] = WRAPLOW(step1[16] + step1[17]); |
906 | 0 | step2[17] = WRAPLOW(step1[16] - step1[17]); |
907 | 0 | step2[18] = WRAPLOW(-step1[18] + step1[19]); |
908 | 0 | step2[19] = WRAPLOW(step1[18] + step1[19]); |
909 | 0 | step2[20] = WRAPLOW(step1[20] + step1[21]); |
910 | 0 | step2[21] = WRAPLOW(step1[20] - step1[21]); |
911 | 0 | step2[22] = WRAPLOW(-step1[22] + step1[23]); |
912 | 0 | step2[23] = WRAPLOW(step1[22] + step1[23]); |
913 | 0 | step2[24] = WRAPLOW(step1[24] + step1[25]); |
914 | 0 | step2[25] = WRAPLOW(step1[24] - step1[25]); |
915 | 0 | step2[26] = WRAPLOW(-step1[26] + step1[27]); |
916 | 0 | step2[27] = WRAPLOW(step1[26] + step1[27]); |
917 | 0 | step2[28] = WRAPLOW(step1[28] + step1[29]); |
918 | 0 | step2[29] = WRAPLOW(step1[28] - step1[29]); |
919 | 0 | step2[30] = WRAPLOW(-step1[30] + step1[31]); |
920 | 0 | step2[31] = WRAPLOW(step1[30] + step1[31]); |
921 | | |
922 | | // stage 3 |
923 | 0 | step1[0] = step2[0]; |
924 | 0 | step1[1] = step2[1]; |
925 | 0 | step1[2] = step2[2]; |
926 | 0 | step1[3] = step2[3]; |
927 | |
|
928 | 0 | temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
929 | 0 | temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
930 | 0 | step1[4] = WRAPLOW(dct_const_round_shift(temp1)); |
931 | 0 | step1[7] = WRAPLOW(dct_const_round_shift(temp2)); |
932 | 0 | temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
933 | 0 | temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
934 | 0 | step1[5] = WRAPLOW(dct_const_round_shift(temp1)); |
935 | 0 | step1[6] = WRAPLOW(dct_const_round_shift(temp2)); |
936 | |
|
937 | 0 | step1[8] = WRAPLOW(step2[8] + step2[9]); |
938 | 0 | step1[9] = WRAPLOW(step2[8] - step2[9]); |
939 | 0 | step1[10] = WRAPLOW(-step2[10] + step2[11]); |
940 | 0 | step1[11] = WRAPLOW(step2[10] + step2[11]); |
941 | 0 | step1[12] = WRAPLOW(step2[12] + step2[13]); |
942 | 0 | step1[13] = WRAPLOW(step2[12] - step2[13]); |
943 | 0 | step1[14] = WRAPLOW(-step2[14] + step2[15]); |
944 | 0 | step1[15] = WRAPLOW(step2[14] + step2[15]); |
945 | |
|
946 | 0 | step1[16] = step2[16]; |
947 | 0 | step1[31] = step2[31]; |
948 | 0 | temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; |
949 | 0 | temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; |
950 | 0 | step1[17] = WRAPLOW(dct_const_round_shift(temp1)); |
951 | 0 | step1[30] = WRAPLOW(dct_const_round_shift(temp2)); |
952 | 0 | temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; |
953 | 0 | temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; |
954 | 0 | step1[18] = WRAPLOW(dct_const_round_shift(temp1)); |
955 | 0 | step1[29] = WRAPLOW(dct_const_round_shift(temp2)); |
956 | 0 | step1[19] = step2[19]; |
957 | 0 | step1[20] = step2[20]; |
958 | 0 | temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; |
959 | 0 | temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; |
960 | 0 | step1[21] = WRAPLOW(dct_const_round_shift(temp1)); |
961 | 0 | step1[26] = WRAPLOW(dct_const_round_shift(temp2)); |
962 | 0 | temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; |
963 | 0 | temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; |
964 | 0 | step1[22] = WRAPLOW(dct_const_round_shift(temp1)); |
965 | 0 | step1[25] = WRAPLOW(dct_const_round_shift(temp2)); |
966 | 0 | step1[23] = step2[23]; |
967 | 0 | step1[24] = step2[24]; |
968 | 0 | step1[27] = step2[27]; |
969 | 0 | step1[28] = step2[28]; |
970 | | |
971 | | // stage 4 |
972 | 0 | temp1 = (step1[0] + step1[1]) * cospi_16_64; |
973 | 0 | temp2 = (step1[0] - step1[1]) * cospi_16_64; |
974 | 0 | step2[0] = WRAPLOW(dct_const_round_shift(temp1)); |
975 | 0 | step2[1] = WRAPLOW(dct_const_round_shift(temp2)); |
976 | 0 | temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
977 | 0 | temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
978 | 0 | step2[2] = WRAPLOW(dct_const_round_shift(temp1)); |
979 | 0 | step2[3] = WRAPLOW(dct_const_round_shift(temp2)); |
980 | 0 | step2[4] = WRAPLOW(step1[4] + step1[5]); |
981 | 0 | step2[5] = WRAPLOW(step1[4] - step1[5]); |
982 | 0 | step2[6] = WRAPLOW(-step1[6] + step1[7]); |
983 | 0 | step2[7] = WRAPLOW(step1[6] + step1[7]); |
984 | |
|
985 | 0 | step2[8] = step1[8]; |
986 | 0 | step2[15] = step1[15]; |
987 | 0 | temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
988 | 0 | temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
989 | 0 | step2[9] = WRAPLOW(dct_const_round_shift(temp1)); |
990 | 0 | step2[14] = WRAPLOW(dct_const_round_shift(temp2)); |
991 | 0 | temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
992 | 0 | temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
993 | 0 | step2[10] = WRAPLOW(dct_const_round_shift(temp1)); |
994 | 0 | step2[13] = WRAPLOW(dct_const_round_shift(temp2)); |
995 | 0 | step2[11] = step1[11]; |
996 | 0 | step2[12] = step1[12]; |
997 | |
|
998 | 0 | step2[16] = WRAPLOW(step1[16] + step1[19]); |
999 | 0 | step2[17] = WRAPLOW(step1[17] + step1[18]); |
1000 | 0 | step2[18] = WRAPLOW(step1[17] - step1[18]); |
1001 | 0 | step2[19] = WRAPLOW(step1[16] - step1[19]); |
1002 | 0 | step2[20] = WRAPLOW(-step1[20] + step1[23]); |
1003 | 0 | step2[21] = WRAPLOW(-step1[21] + step1[22]); |
1004 | 0 | step2[22] = WRAPLOW(step1[21] + step1[22]); |
1005 | 0 | step2[23] = WRAPLOW(step1[20] + step1[23]); |
1006 | |
|
1007 | 0 | step2[24] = WRAPLOW(step1[24] + step1[27]); |
1008 | 0 | step2[25] = WRAPLOW(step1[25] + step1[26]); |
1009 | 0 | step2[26] = WRAPLOW(step1[25] - step1[26]); |
1010 | 0 | step2[27] = WRAPLOW(step1[24] - step1[27]); |
1011 | 0 | step2[28] = WRAPLOW(-step1[28] + step1[31]); |
1012 | 0 | step2[29] = WRAPLOW(-step1[29] + step1[30]); |
1013 | 0 | step2[30] = WRAPLOW(step1[29] + step1[30]); |
1014 | 0 | step2[31] = WRAPLOW(step1[28] + step1[31]); |
1015 | | |
1016 | | // stage 5 |
1017 | 0 | step1[0] = WRAPLOW(step2[0] + step2[3]); |
1018 | 0 | step1[1] = WRAPLOW(step2[1] + step2[2]); |
1019 | 0 | step1[2] = WRAPLOW(step2[1] - step2[2]); |
1020 | 0 | step1[3] = WRAPLOW(step2[0] - step2[3]); |
1021 | 0 | step1[4] = step2[4]; |
1022 | 0 | temp1 = (step2[6] - step2[5]) * cospi_16_64; |
1023 | 0 | temp2 = (step2[5] + step2[6]) * cospi_16_64; |
1024 | 0 | step1[5] = WRAPLOW(dct_const_round_shift(temp1)); |
1025 | 0 | step1[6] = WRAPLOW(dct_const_round_shift(temp2)); |
1026 | 0 | step1[7] = step2[7]; |
1027 | |
|
1028 | 0 | step1[8] = WRAPLOW(step2[8] + step2[11]); |
1029 | 0 | step1[9] = WRAPLOW(step2[9] + step2[10]); |
1030 | 0 | step1[10] = WRAPLOW(step2[9] - step2[10]); |
1031 | 0 | step1[11] = WRAPLOW(step2[8] - step2[11]); |
1032 | 0 | step1[12] = WRAPLOW(-step2[12] + step2[15]); |
1033 | 0 | step1[13] = WRAPLOW(-step2[13] + step2[14]); |
1034 | 0 | step1[14] = WRAPLOW(step2[13] + step2[14]); |
1035 | 0 | step1[15] = WRAPLOW(step2[12] + step2[15]); |
1036 | |
|
1037 | 0 | step1[16] = step2[16]; |
1038 | 0 | step1[17] = step2[17]; |
1039 | 0 | temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; |
1040 | 0 | temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; |
1041 | 0 | step1[18] = WRAPLOW(dct_const_round_shift(temp1)); |
1042 | 0 | step1[29] = WRAPLOW(dct_const_round_shift(temp2)); |
1043 | 0 | temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; |
1044 | 0 | temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; |
1045 | 0 | step1[19] = WRAPLOW(dct_const_round_shift(temp1)); |
1046 | 0 | step1[28] = WRAPLOW(dct_const_round_shift(temp2)); |
1047 | 0 | temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; |
1048 | 0 | temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; |
1049 | 0 | step1[20] = WRAPLOW(dct_const_round_shift(temp1)); |
1050 | 0 | step1[27] = WRAPLOW(dct_const_round_shift(temp2)); |
1051 | 0 | temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; |
1052 | 0 | temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; |
1053 | 0 | step1[21] = WRAPLOW(dct_const_round_shift(temp1)); |
1054 | 0 | step1[26] = WRAPLOW(dct_const_round_shift(temp2)); |
1055 | 0 | step1[22] = step2[22]; |
1056 | 0 | step1[23] = step2[23]; |
1057 | 0 | step1[24] = step2[24]; |
1058 | 0 | step1[25] = step2[25]; |
1059 | 0 | step1[30] = step2[30]; |
1060 | 0 | step1[31] = step2[31]; |
1061 | | |
1062 | | // stage 6 |
1063 | 0 | step2[0] = WRAPLOW(step1[0] + step1[7]); |
1064 | 0 | step2[1] = WRAPLOW(step1[1] + step1[6]); |
1065 | 0 | step2[2] = WRAPLOW(step1[2] + step1[5]); |
1066 | 0 | step2[3] = WRAPLOW(step1[3] + step1[4]); |
1067 | 0 | step2[4] = WRAPLOW(step1[3] - step1[4]); |
1068 | 0 | step2[5] = WRAPLOW(step1[2] - step1[5]); |
1069 | 0 | step2[6] = WRAPLOW(step1[1] - step1[6]); |
1070 | 0 | step2[7] = WRAPLOW(step1[0] - step1[7]); |
1071 | 0 | step2[8] = step1[8]; |
1072 | 0 | step2[9] = step1[9]; |
1073 | 0 | temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
1074 | 0 | temp2 = (step1[10] + step1[13]) * cospi_16_64; |
1075 | 0 | step2[10] = WRAPLOW(dct_const_round_shift(temp1)); |
1076 | 0 | step2[13] = WRAPLOW(dct_const_round_shift(temp2)); |
1077 | 0 | temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
1078 | 0 | temp2 = (step1[11] + step1[12]) * cospi_16_64; |
1079 | 0 | step2[11] = WRAPLOW(dct_const_round_shift(temp1)); |
1080 | 0 | step2[12] = WRAPLOW(dct_const_round_shift(temp2)); |
1081 | 0 | step2[14] = step1[14]; |
1082 | 0 | step2[15] = step1[15]; |
1083 | |
|
1084 | 0 | step2[16] = WRAPLOW(step1[16] + step1[23]); |
1085 | 0 | step2[17] = WRAPLOW(step1[17] + step1[22]); |
1086 | 0 | step2[18] = WRAPLOW(step1[18] + step1[21]); |
1087 | 0 | step2[19] = WRAPLOW(step1[19] + step1[20]); |
1088 | 0 | step2[20] = WRAPLOW(step1[19] - step1[20]); |
1089 | 0 | step2[21] = WRAPLOW(step1[18] - step1[21]); |
1090 | 0 | step2[22] = WRAPLOW(step1[17] - step1[22]); |
1091 | 0 | step2[23] = WRAPLOW(step1[16] - step1[23]); |
1092 | |
|
1093 | 0 | step2[24] = WRAPLOW(-step1[24] + step1[31]); |
1094 | 0 | step2[25] = WRAPLOW(-step1[25] + step1[30]); |
1095 | 0 | step2[26] = WRAPLOW(-step1[26] + step1[29]); |
1096 | 0 | step2[27] = WRAPLOW(-step1[27] + step1[28]); |
1097 | 0 | step2[28] = WRAPLOW(step1[27] + step1[28]); |
1098 | 0 | step2[29] = WRAPLOW(step1[26] + step1[29]); |
1099 | 0 | step2[30] = WRAPLOW(step1[25] + step1[30]); |
1100 | 0 | step2[31] = WRAPLOW(step1[24] + step1[31]); |
1101 | | |
1102 | | // stage 7 |
1103 | 0 | step1[0] = WRAPLOW(step2[0] + step2[15]); |
1104 | 0 | step1[1] = WRAPLOW(step2[1] + step2[14]); |
1105 | 0 | step1[2] = WRAPLOW(step2[2] + step2[13]); |
1106 | 0 | step1[3] = WRAPLOW(step2[3] + step2[12]); |
1107 | 0 | step1[4] = WRAPLOW(step2[4] + step2[11]); |
1108 | 0 | step1[5] = WRAPLOW(step2[5] + step2[10]); |
1109 | 0 | step1[6] = WRAPLOW(step2[6] + step2[9]); |
1110 | 0 | step1[7] = WRAPLOW(step2[7] + step2[8]); |
1111 | 0 | step1[8] = WRAPLOW(step2[7] - step2[8]); |
1112 | 0 | step1[9] = WRAPLOW(step2[6] - step2[9]); |
1113 | 0 | step1[10] = WRAPLOW(step2[5] - step2[10]); |
1114 | 0 | step1[11] = WRAPLOW(step2[4] - step2[11]); |
1115 | 0 | step1[12] = WRAPLOW(step2[3] - step2[12]); |
1116 | 0 | step1[13] = WRAPLOW(step2[2] - step2[13]); |
1117 | 0 | step1[14] = WRAPLOW(step2[1] - step2[14]); |
1118 | 0 | step1[15] = WRAPLOW(step2[0] - step2[15]); |
1119 | |
|
1120 | 0 | step1[16] = step2[16]; |
1121 | 0 | step1[17] = step2[17]; |
1122 | 0 | step1[18] = step2[18]; |
1123 | 0 | step1[19] = step2[19]; |
1124 | 0 | temp1 = (-step2[20] + step2[27]) * cospi_16_64; |
1125 | 0 | temp2 = (step2[20] + step2[27]) * cospi_16_64; |
1126 | 0 | step1[20] = WRAPLOW(dct_const_round_shift(temp1)); |
1127 | 0 | step1[27] = WRAPLOW(dct_const_round_shift(temp2)); |
1128 | 0 | temp1 = (-step2[21] + step2[26]) * cospi_16_64; |
1129 | 0 | temp2 = (step2[21] + step2[26]) * cospi_16_64; |
1130 | 0 | step1[21] = WRAPLOW(dct_const_round_shift(temp1)); |
1131 | 0 | step1[26] = WRAPLOW(dct_const_round_shift(temp2)); |
1132 | 0 | temp1 = (-step2[22] + step2[25]) * cospi_16_64; |
1133 | 0 | temp2 = (step2[22] + step2[25]) * cospi_16_64; |
1134 | 0 | step1[22] = WRAPLOW(dct_const_round_shift(temp1)); |
1135 | 0 | step1[25] = WRAPLOW(dct_const_round_shift(temp2)); |
1136 | 0 | temp1 = (-step2[23] + step2[24]) * cospi_16_64; |
1137 | 0 | temp2 = (step2[23] + step2[24]) * cospi_16_64; |
1138 | 0 | step1[23] = WRAPLOW(dct_const_round_shift(temp1)); |
1139 | 0 | step1[24] = WRAPLOW(dct_const_round_shift(temp2)); |
1140 | 0 | step1[28] = step2[28]; |
1141 | 0 | step1[29] = step2[29]; |
1142 | 0 | step1[30] = step2[30]; |
1143 | 0 | step1[31] = step2[31]; |
1144 | | |
1145 | | // final stage |
1146 | 0 | output[0] = WRAPLOW(step1[0] + step1[31]); |
1147 | 0 | output[1] = WRAPLOW(step1[1] + step1[30]); |
1148 | 0 | output[2] = WRAPLOW(step1[2] + step1[29]); |
1149 | 0 | output[3] = WRAPLOW(step1[3] + step1[28]); |
1150 | 0 | output[4] = WRAPLOW(step1[4] + step1[27]); |
1151 | 0 | output[5] = WRAPLOW(step1[5] + step1[26]); |
1152 | 0 | output[6] = WRAPLOW(step1[6] + step1[25]); |
1153 | 0 | output[7] = WRAPLOW(step1[7] + step1[24]); |
1154 | 0 | output[8] = WRAPLOW(step1[8] + step1[23]); |
1155 | 0 | output[9] = WRAPLOW(step1[9] + step1[22]); |
1156 | 0 | output[10] = WRAPLOW(step1[10] + step1[21]); |
1157 | 0 | output[11] = WRAPLOW(step1[11] + step1[20]); |
1158 | 0 | output[12] = WRAPLOW(step1[12] + step1[19]); |
1159 | 0 | output[13] = WRAPLOW(step1[13] + step1[18]); |
1160 | 0 | output[14] = WRAPLOW(step1[14] + step1[17]); |
1161 | 0 | output[15] = WRAPLOW(step1[15] + step1[16]); |
1162 | 0 | output[16] = WRAPLOW(step1[15] - step1[16]); |
1163 | 0 | output[17] = WRAPLOW(step1[14] - step1[17]); |
1164 | 0 | output[18] = WRAPLOW(step1[13] - step1[18]); |
1165 | 0 | output[19] = WRAPLOW(step1[12] - step1[19]); |
1166 | 0 | output[20] = WRAPLOW(step1[11] - step1[20]); |
1167 | 0 | output[21] = WRAPLOW(step1[10] - step1[21]); |
1168 | 0 | output[22] = WRAPLOW(step1[9] - step1[22]); |
1169 | 0 | output[23] = WRAPLOW(step1[8] - step1[23]); |
1170 | 0 | output[24] = WRAPLOW(step1[7] - step1[24]); |
1171 | 0 | output[25] = WRAPLOW(step1[6] - step1[25]); |
1172 | 0 | output[26] = WRAPLOW(step1[5] - step1[26]); |
1173 | 0 | output[27] = WRAPLOW(step1[4] - step1[27]); |
1174 | 0 | output[28] = WRAPLOW(step1[3] - step1[28]); |
1175 | 0 | output[29] = WRAPLOW(step1[2] - step1[29]); |
1176 | 0 | output[30] = WRAPLOW(step1[1] - step1[30]); |
1177 | 0 | output[31] = WRAPLOW(step1[0] - step1[31]); |
1178 | 0 | } |
1179 | | |
1180 | | void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, |
1181 | 0 | int stride) { |
1182 | 0 | int i, j; |
1183 | 0 | tran_low_t out[32 * 32]; |
1184 | 0 | tran_low_t *outptr = out; |
1185 | 0 | tran_low_t temp_in[32], temp_out[32]; |
1186 | | |
1187 | | // Rows |
1188 | 0 | for (i = 0; i < 32; ++i) { |
1189 | 0 | int16_t zero_coeff = 0; |
1190 | 0 | for (j = 0; j < 32; ++j) zero_coeff |= input[j]; |
1191 | |
|
1192 | 0 | if (zero_coeff) |
1193 | 0 | idct32_c(input, outptr); |
1194 | 0 | else |
1195 | 0 | memset(outptr, 0, sizeof(tran_low_t) * 32); |
1196 | 0 | input += 32; |
1197 | 0 | outptr += 32; |
1198 | 0 | } |
1199 | | |
1200 | | // Columns |
1201 | 0 | for (i = 0; i < 32; ++i) { |
1202 | 0 | for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; |
1203 | 0 | idct32_c(temp_in, temp_out); |
1204 | 0 | for (j = 0; j < 32; ++j) { |
1205 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1206 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
1207 | 0 | } |
1208 | 0 | } |
1209 | 0 | } |
1210 | | |
1211 | | void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, |
1212 | 0 | int stride) { |
1213 | 0 | int i, j; |
1214 | 0 | tran_low_t out[32 * 32] = { 0 }; |
1215 | 0 | tran_low_t *outptr = out; |
1216 | 0 | tran_low_t temp_in[32], temp_out[32]; |
1217 | | |
1218 | | // Rows |
1219 | | // Only upper-left 16x16 has non-zero coeff |
1220 | 0 | for (i = 0; i < 16; ++i) { |
1221 | 0 | idct32_c(input, outptr); |
1222 | 0 | input += 32; |
1223 | 0 | outptr += 32; |
1224 | 0 | } |
1225 | | |
1226 | | // Columns |
1227 | 0 | for (i = 0; i < 32; ++i) { |
1228 | 0 | for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; |
1229 | 0 | idct32_c(temp_in, temp_out); |
1230 | 0 | for (j = 0; j < 32; ++j) { |
1231 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1232 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
1233 | 0 | } |
1234 | 0 | } |
1235 | 0 | } |
1236 | | |
1237 | | void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, |
1238 | 0 | int stride) { |
1239 | 0 | int i, j; |
1240 | 0 | tran_low_t out[32 * 32] = { 0 }; |
1241 | 0 | tran_low_t *outptr = out; |
1242 | 0 | tran_low_t temp_in[32], temp_out[32]; |
1243 | | |
1244 | | // Rows |
1245 | | // Only upper-left 8x8 has non-zero coeff |
1246 | 0 | for (i = 0; i < 8; ++i) { |
1247 | 0 | idct32_c(input, outptr); |
1248 | 0 | input += 32; |
1249 | 0 | outptr += 32; |
1250 | 0 | } |
1251 | | |
1252 | | // Columns |
1253 | 0 | for (i = 0; i < 32; ++i) { |
1254 | 0 | for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; |
1255 | 0 | idct32_c(temp_in, temp_out); |
1256 | 0 | for (j = 0; j < 32; ++j) { |
1257 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1258 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
1259 | 0 | } |
1260 | 0 | } |
1261 | 0 | } |
1262 | | |
1263 | 0 | void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
1264 | 0 | int i, j; |
1265 | 0 | tran_high_t a1; |
1266 | 0 | tran_low_t out = |
1267 | 0 | WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); |
1268 | |
|
1269 | 0 | out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); |
1270 | 0 | a1 = ROUND_POWER_OF_TWO(out, 6); |
1271 | |
|
1272 | 0 | for (j = 0; j < 32; ++j) { |
1273 | 0 | for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1); |
1274 | 0 | dest += stride; |
1275 | 0 | } |
1276 | 0 | } |
1277 | | |
1278 | | #if CONFIG_VP9_HIGHBITDEPTH |
1279 | | |
1280 | | // 12 signal input bits + 7 2D forward transform amplify bits + 5 1D inverse |
1281 | | // transform amplify bits + 1 bit for contingency in rounding and quantizing |
1282 | 0 | #define HIGHBD_VALID_TXFM_MAGNITUDE_RANGE (1 << 25) |
1283 | | |
1284 | | static INLINE int detect_invalid_highbd_input(const tran_low_t *input, |
1285 | 0 | int size) { |
1286 | 0 | int i; |
1287 | 0 | for (i = 0; i < size; ++i) |
1288 | 0 | if (abs(input[i]) >= HIGHBD_VALID_TXFM_MAGNITUDE_RANGE) return 1; |
1289 | 0 | return 0; |
1290 | 0 | } |
1291 | | |
1292 | | void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint16_t *dest, |
1293 | 0 | int stride, int bd) { |
1294 | | /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
1295 | | 0.5 shifts per pixel. */ |
1296 | 0 | int i; |
1297 | 0 | tran_low_t output[16]; |
1298 | 0 | tran_high_t a1, b1, c1, d1, e1; |
1299 | 0 | const tran_low_t *ip = input; |
1300 | 0 | tran_low_t *op = output; |
1301 | |
|
1302 | 0 | for (i = 0; i < 4; i++) { |
1303 | 0 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
1304 | 0 | c1 = ip[1] >> UNIT_QUANT_SHIFT; |
1305 | 0 | d1 = ip[2] >> UNIT_QUANT_SHIFT; |
1306 | 0 | b1 = ip[3] >> UNIT_QUANT_SHIFT; |
1307 | 0 | a1 += c1; |
1308 | 0 | d1 -= b1; |
1309 | 0 | e1 = (a1 - d1) >> 1; |
1310 | 0 | b1 = e1 - b1; |
1311 | 0 | c1 = e1 - c1; |
1312 | 0 | a1 -= b1; |
1313 | 0 | d1 += c1; |
1314 | 0 | op[0] = HIGHBD_WRAPLOW(a1, bd); |
1315 | 0 | op[1] = HIGHBD_WRAPLOW(b1, bd); |
1316 | 0 | op[2] = HIGHBD_WRAPLOW(c1, bd); |
1317 | 0 | op[3] = HIGHBD_WRAPLOW(d1, bd); |
1318 | 0 | ip += 4; |
1319 | 0 | op += 4; |
1320 | 0 | } |
1321 | |
|
1322 | 0 | ip = output; |
1323 | 0 | for (i = 0; i < 4; i++) { |
1324 | 0 | a1 = ip[4 * 0]; |
1325 | 0 | c1 = ip[4 * 1]; |
1326 | 0 | d1 = ip[4 * 2]; |
1327 | 0 | b1 = ip[4 * 3]; |
1328 | 0 | a1 += c1; |
1329 | 0 | d1 -= b1; |
1330 | 0 | e1 = (a1 - d1) >> 1; |
1331 | 0 | b1 = e1 - b1; |
1332 | 0 | c1 = e1 - c1; |
1333 | 0 | a1 -= b1; |
1334 | 0 | d1 += c1; |
1335 | 0 | dest[stride * 0] = |
1336 | 0 | highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd); |
1337 | 0 | dest[stride * 1] = |
1338 | 0 | highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd); |
1339 | 0 | dest[stride * 2] = |
1340 | 0 | highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd); |
1341 | 0 | dest[stride * 3] = |
1342 | 0 | highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd); |
1343 | |
|
1344 | 0 | ip++; |
1345 | 0 | dest++; |
1346 | 0 | } |
1347 | 0 | } |
1348 | | |
1349 | | void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint16_t *dest, |
1350 | 0 | int stride, int bd) { |
1351 | 0 | int i; |
1352 | 0 | tran_high_t a1, e1; |
1353 | 0 | tran_low_t tmp[4]; |
1354 | 0 | const tran_low_t *ip = input; |
1355 | 0 | tran_low_t *op = tmp; |
1356 | 0 | (void)bd; |
1357 | |
|
1358 | 0 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
1359 | 0 | e1 = a1 >> 1; |
1360 | 0 | a1 -= e1; |
1361 | 0 | op[0] = HIGHBD_WRAPLOW(a1, bd); |
1362 | 0 | op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); |
1363 | |
|
1364 | 0 | ip = tmp; |
1365 | 0 | for (i = 0; i < 4; i++) { |
1366 | 0 | e1 = ip[0] >> 1; |
1367 | 0 | a1 = ip[0] - e1; |
1368 | 0 | dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); |
1369 | 0 | dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], e1, bd); |
1370 | 0 | dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], e1, bd); |
1371 | 0 | dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], e1, bd); |
1372 | 0 | ip++; |
1373 | 0 | dest++; |
1374 | 0 | } |
1375 | 0 | } |
1376 | | |
1377 | 0 | void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1378 | 0 | tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1379 | 0 | tran_low_t x0 = input[0]; |
1380 | 0 | tran_low_t x1 = input[1]; |
1381 | 0 | tran_low_t x2 = input[2]; |
1382 | 0 | tran_low_t x3 = input[3]; |
1383 | 0 | (void)bd; |
1384 | |
|
1385 | 0 | if (detect_invalid_highbd_input(input, 4)) { |
1386 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
1387 | | assert(0 && "invalid highbd txfm input"); |
1388 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
1389 | 0 | memset(output, 0, sizeof(*output) * 4); |
1390 | 0 | return; |
1391 | 0 | } |
1392 | | |
1393 | 0 | if (!(x0 | x1 | x2 | x3)) { |
1394 | 0 | memset(output, 0, 4 * sizeof(*output)); |
1395 | 0 | return; |
1396 | 0 | } |
1397 | | |
1398 | 0 | s0 = (tran_high_t)sinpi_1_9 * x0; |
1399 | 0 | s1 = (tran_high_t)sinpi_2_9 * x0; |
1400 | 0 | s2 = (tran_high_t)sinpi_3_9 * x1; |
1401 | 0 | s3 = (tran_high_t)sinpi_4_9 * x2; |
1402 | 0 | s4 = (tran_high_t)sinpi_1_9 * x2; |
1403 | 0 | s5 = (tran_high_t)sinpi_2_9 * x3; |
1404 | 0 | s6 = (tran_high_t)sinpi_4_9 * x3; |
1405 | 0 | s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd); |
1406 | |
|
1407 | 0 | s0 = s0 + s3 + s5; |
1408 | 0 | s1 = s1 - s4 - s6; |
1409 | 0 | s3 = s2; |
1410 | 0 | s2 = sinpi_3_9 * s7; |
1411 | | |
1412 | | // 1-D transform scaling factor is sqrt(2). |
1413 | | // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
1414 | | // + 1b (addition) = 29b. |
1415 | | // Hence the output bit depth is 15b. |
1416 | 0 | output[0] = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s3), bd); |
1417 | 0 | output[1] = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s3), bd); |
1418 | 0 | output[2] = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd); |
1419 | 0 | output[3] = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); |
1420 | 0 | } |
1421 | | |
1422 | 0 | void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1423 | 0 | tran_low_t step[4]; |
1424 | 0 | tran_high_t temp1, temp2; |
1425 | 0 | (void)bd; |
1426 | |
|
1427 | 0 | if (detect_invalid_highbd_input(input, 4)) { |
1428 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
1429 | | assert(0 && "invalid highbd txfm input"); |
1430 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
1431 | 0 | memset(output, 0, sizeof(*output) * 4); |
1432 | 0 | return; |
1433 | 0 | } |
1434 | | |
1435 | | // stage 1 |
1436 | 0 | temp1 = (input[0] + input[2]) * (tran_high_t)cospi_16_64; |
1437 | 0 | temp2 = (input[0] - input[2]) * (tran_high_t)cospi_16_64; |
1438 | 0 | step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1439 | 0 | step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1440 | 0 | temp1 = |
1441 | 0 | input[1] * (tran_high_t)cospi_24_64 - input[3] * (tran_high_t)cospi_8_64; |
1442 | 0 | temp2 = |
1443 | 0 | input[1] * (tran_high_t)cospi_8_64 + input[3] * (tran_high_t)cospi_24_64; |
1444 | 0 | step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1445 | 0 | step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1446 | | |
1447 | | // stage 2 |
1448 | 0 | output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); |
1449 | 0 | output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); |
1450 | 0 | output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); |
1451 | 0 | output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); |
1452 | 0 | } |
1453 | | |
1454 | | void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint16_t *dest, |
1455 | 0 | int stride, int bd) { |
1456 | 0 | int i, j; |
1457 | 0 | tran_low_t out[4 * 4]; |
1458 | 0 | tran_low_t *outptr = out; |
1459 | 0 | tran_low_t temp_in[4], temp_out[4]; |
1460 | | |
1461 | | // Rows |
1462 | 0 | for (i = 0; i < 4; ++i) { |
1463 | 0 | vpx_highbd_idct4_c(input, outptr, bd); |
1464 | 0 | input += 4; |
1465 | 0 | outptr += 4; |
1466 | 0 | } |
1467 | | |
1468 | | // Columns |
1469 | 0 | for (i = 0; i < 4; ++i) { |
1470 | 0 | for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; |
1471 | 0 | vpx_highbd_idct4_c(temp_in, temp_out, bd); |
1472 | 0 | for (j = 0; j < 4; ++j) { |
1473 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
1474 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
1475 | 0 | } |
1476 | 0 | } |
1477 | 0 | } |
1478 | | |
1479 | | void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint16_t *dest, |
1480 | 0 | int stride, int bd) { |
1481 | 0 | int i; |
1482 | 0 | tran_high_t a1; |
1483 | 0 | tran_low_t out = HIGHBD_WRAPLOW( |
1484 | 0 | dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); |
1485 | |
|
1486 | 0 | out = |
1487 | 0 | HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); |
1488 | 0 | a1 = ROUND_POWER_OF_TWO(out, 4); |
1489 | |
|
1490 | 0 | for (i = 0; i < 4; i++) { |
1491 | 0 | dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); |
1492 | 0 | dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); |
1493 | 0 | dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); |
1494 | 0 | dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); |
1495 | 0 | dest += stride; |
1496 | 0 | } |
1497 | 0 | } |
1498 | | |
1499 | 0 | void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1500 | 0 | tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1501 | 0 | tran_low_t x0 = input[7]; |
1502 | 0 | tran_low_t x1 = input[0]; |
1503 | 0 | tran_low_t x2 = input[5]; |
1504 | 0 | tran_low_t x3 = input[2]; |
1505 | 0 | tran_low_t x4 = input[3]; |
1506 | 0 | tran_low_t x5 = input[4]; |
1507 | 0 | tran_low_t x6 = input[1]; |
1508 | 0 | tran_low_t x7 = input[6]; |
1509 | 0 | (void)bd; |
1510 | |
|
1511 | 0 | if (detect_invalid_highbd_input(input, 8)) { |
1512 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
1513 | | assert(0 && "invalid highbd txfm input"); |
1514 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
1515 | 0 | memset(output, 0, sizeof(*output) * 8); |
1516 | 0 | return; |
1517 | 0 | } |
1518 | | |
1519 | 0 | if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
1520 | 0 | memset(output, 0, 8 * sizeof(*output)); |
1521 | 0 | return; |
1522 | 0 | } |
1523 | | |
1524 | | // stage 1 |
1525 | 0 | s0 = (tran_high_t)cospi_2_64 * x0 + (tran_high_t)cospi_30_64 * x1; |
1526 | 0 | s1 = (tran_high_t)cospi_30_64 * x0 - (tran_high_t)cospi_2_64 * x1; |
1527 | 0 | s2 = (tran_high_t)cospi_10_64 * x2 + (tran_high_t)cospi_22_64 * x3; |
1528 | 0 | s3 = (tran_high_t)cospi_22_64 * x2 - (tran_high_t)cospi_10_64 * x3; |
1529 | 0 | s4 = (tran_high_t)cospi_18_64 * x4 + (tran_high_t)cospi_14_64 * x5; |
1530 | 0 | s5 = (tran_high_t)cospi_14_64 * x4 - (tran_high_t)cospi_18_64 * x5; |
1531 | 0 | s6 = (tran_high_t)cospi_26_64 * x6 + (tran_high_t)cospi_6_64 * x7; |
1532 | 0 | s7 = (tran_high_t)cospi_6_64 * x6 - (tran_high_t)cospi_26_64 * x7; |
1533 | |
|
1534 | 0 | x0 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s4), bd); |
1535 | 0 | x1 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s5), bd); |
1536 | 0 | x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 + s6), bd); |
1537 | 0 | x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 + s7), bd); |
1538 | 0 | x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 - s4), bd); |
1539 | 0 | x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 - s5), bd); |
1540 | 0 | x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 - s6), bd); |
1541 | 0 | x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 - s7), bd); |
1542 | | |
1543 | | // stage 2 |
1544 | 0 | s0 = x0; |
1545 | 0 | s1 = x1; |
1546 | 0 | s2 = x2; |
1547 | 0 | s3 = x3; |
1548 | 0 | s4 = (tran_high_t)cospi_8_64 * x4 + (tran_high_t)cospi_24_64 * x5; |
1549 | 0 | s5 = (tran_high_t)cospi_24_64 * x4 - (tran_high_t)cospi_8_64 * x5; |
1550 | 0 | s6 = (tran_high_t)(-cospi_24_64) * x6 + (tran_high_t)cospi_8_64 * x7; |
1551 | 0 | s7 = (tran_high_t)cospi_8_64 * x6 + (tran_high_t)cospi_24_64 * x7; |
1552 | |
|
1553 | 0 | x0 = HIGHBD_WRAPLOW(s0 + s2, bd); |
1554 | 0 | x1 = HIGHBD_WRAPLOW(s1 + s3, bd); |
1555 | 0 | x2 = HIGHBD_WRAPLOW(s0 - s2, bd); |
1556 | 0 | x3 = HIGHBD_WRAPLOW(s1 - s3, bd); |
1557 | 0 | x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s6), bd); |
1558 | 0 | x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s7), bd); |
1559 | 0 | x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s6), bd); |
1560 | 0 | x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s7), bd); |
1561 | | |
1562 | | // stage 3 |
1563 | 0 | s2 = (tran_high_t)cospi_16_64 * (x2 + x3); |
1564 | 0 | s3 = (tran_high_t)cospi_16_64 * (x2 - x3); |
1565 | 0 | s6 = (tran_high_t)cospi_16_64 * (x6 + x7); |
1566 | 0 | s7 = (tran_high_t)cospi_16_64 * (x6 - x7); |
1567 | |
|
1568 | 0 | x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd); |
1569 | 0 | x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3), bd); |
1570 | 0 | x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6), bd); |
1571 | 0 | x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7), bd); |
1572 | |
|
1573 | 0 | output[0] = HIGHBD_WRAPLOW(x0, bd); |
1574 | 0 | output[1] = HIGHBD_WRAPLOW(-x4, bd); |
1575 | 0 | output[2] = HIGHBD_WRAPLOW(x6, bd); |
1576 | 0 | output[3] = HIGHBD_WRAPLOW(-x2, bd); |
1577 | 0 | output[4] = HIGHBD_WRAPLOW(x3, bd); |
1578 | 0 | output[5] = HIGHBD_WRAPLOW(-x7, bd); |
1579 | 0 | output[6] = HIGHBD_WRAPLOW(x5, bd); |
1580 | 0 | output[7] = HIGHBD_WRAPLOW(-x1, bd); |
1581 | 0 | } |
1582 | | |
1583 | 0 | void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1584 | 0 | tran_low_t step1[8], step2[8]; |
1585 | 0 | tran_high_t temp1, temp2; |
1586 | |
|
1587 | 0 | if (detect_invalid_highbd_input(input, 8)) { |
1588 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
1589 | | assert(0 && "invalid highbd txfm input"); |
1590 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
1591 | 0 | memset(output, 0, sizeof(*output) * 8); |
1592 | 0 | return; |
1593 | 0 | } |
1594 | | |
1595 | | // stage 1 |
1596 | 0 | step1[0] = input[0]; |
1597 | 0 | step1[2] = input[4]; |
1598 | 0 | step1[1] = input[2]; |
1599 | 0 | step1[3] = input[6]; |
1600 | 0 | temp1 = |
1601 | 0 | input[1] * (tran_high_t)cospi_28_64 - input[7] * (tran_high_t)cospi_4_64; |
1602 | 0 | temp2 = |
1603 | 0 | input[1] * (tran_high_t)cospi_4_64 + input[7] * (tran_high_t)cospi_28_64; |
1604 | 0 | step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1605 | 0 | step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1606 | 0 | temp1 = |
1607 | 0 | input[5] * (tran_high_t)cospi_12_64 - input[3] * (tran_high_t)cospi_20_64; |
1608 | 0 | temp2 = |
1609 | 0 | input[5] * (tran_high_t)cospi_20_64 + input[3] * (tran_high_t)cospi_12_64; |
1610 | 0 | step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1611 | 0 | step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1612 | | |
1613 | | // stage 2 & stage 3 - even half |
1614 | 0 | vpx_highbd_idct4_c(step1, step1, bd); |
1615 | | |
1616 | | // stage 2 - odd half |
1617 | 0 | step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); |
1618 | 0 | step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); |
1619 | 0 | step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); |
1620 | 0 | step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); |
1621 | | |
1622 | | // stage 3 - odd half |
1623 | 0 | step1[4] = step2[4]; |
1624 | 0 | temp1 = (step2[6] - step2[5]) * (tran_high_t)cospi_16_64; |
1625 | 0 | temp2 = (step2[5] + step2[6]) * (tran_high_t)cospi_16_64; |
1626 | 0 | step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1627 | 0 | step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1628 | 0 | step1[7] = step2[7]; |
1629 | | |
1630 | | // stage 4 |
1631 | 0 | output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); |
1632 | 0 | output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); |
1633 | 0 | output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); |
1634 | 0 | output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); |
1635 | 0 | output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); |
1636 | 0 | output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); |
1637 | 0 | output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); |
1638 | 0 | output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); |
1639 | 0 | } |
1640 | | |
1641 | | void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint16_t *dest, |
1642 | 0 | int stride, int bd) { |
1643 | 0 | int i, j; |
1644 | 0 | tran_low_t out[8 * 8]; |
1645 | 0 | tran_low_t *outptr = out; |
1646 | 0 | tran_low_t temp_in[8], temp_out[8]; |
1647 | | |
1648 | | // First transform rows |
1649 | 0 | for (i = 0; i < 8; ++i) { |
1650 | 0 | vpx_highbd_idct8_c(input, outptr, bd); |
1651 | 0 | input += 8; |
1652 | 0 | outptr += 8; |
1653 | 0 | } |
1654 | | |
1655 | | // Then transform columns |
1656 | 0 | for (i = 0; i < 8; ++i) { |
1657 | 0 | for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; |
1658 | 0 | vpx_highbd_idct8_c(temp_in, temp_out, bd); |
1659 | 0 | for (j = 0; j < 8; ++j) { |
1660 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
1661 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
1662 | 0 | } |
1663 | 0 | } |
1664 | 0 | } |
1665 | | |
1666 | | void vpx_highbd_idct8x8_12_add_c(const tran_low_t *input, uint16_t *dest, |
1667 | 0 | int stride, int bd) { |
1668 | 0 | int i, j; |
1669 | 0 | tran_low_t out[8 * 8] = { 0 }; |
1670 | 0 | tran_low_t *outptr = out; |
1671 | 0 | tran_low_t temp_in[8], temp_out[8]; |
1672 | | |
1673 | | // First transform rows |
1674 | | // Only first 4 row has non-zero coefs |
1675 | 0 | for (i = 0; i < 4; ++i) { |
1676 | 0 | vpx_highbd_idct8_c(input, outptr, bd); |
1677 | 0 | input += 8; |
1678 | 0 | outptr += 8; |
1679 | 0 | } |
1680 | | |
1681 | | // Then transform columns |
1682 | 0 | for (i = 0; i < 8; ++i) { |
1683 | 0 | for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; |
1684 | 0 | vpx_highbd_idct8_c(temp_in, temp_out, bd); |
1685 | 0 | for (j = 0; j < 8; ++j) { |
1686 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
1687 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
1688 | 0 | } |
1689 | 0 | } |
1690 | 0 | } |
1691 | | |
1692 | | void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint16_t *dest, |
1693 | 0 | int stride, int bd) { |
1694 | 0 | int i, j; |
1695 | 0 | tran_high_t a1; |
1696 | 0 | tran_low_t out = HIGHBD_WRAPLOW( |
1697 | 0 | dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); |
1698 | |
|
1699 | 0 | out = |
1700 | 0 | HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); |
1701 | 0 | a1 = ROUND_POWER_OF_TWO(out, 5); |
1702 | 0 | for (j = 0; j < 8; ++j) { |
1703 | 0 | for (i = 0; i < 8; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
1704 | 0 | dest += stride; |
1705 | 0 | } |
1706 | 0 | } |
1707 | | |
1708 | 0 | void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1709 | 0 | tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
1710 | 0 | tran_high_t s9, s10, s11, s12, s13, s14, s15; |
1711 | 0 | tran_low_t x0 = input[15]; |
1712 | 0 | tran_low_t x1 = input[0]; |
1713 | 0 | tran_low_t x2 = input[13]; |
1714 | 0 | tran_low_t x3 = input[2]; |
1715 | 0 | tran_low_t x4 = input[11]; |
1716 | 0 | tran_low_t x5 = input[4]; |
1717 | 0 | tran_low_t x6 = input[9]; |
1718 | 0 | tran_low_t x7 = input[6]; |
1719 | 0 | tran_low_t x8 = input[7]; |
1720 | 0 | tran_low_t x9 = input[8]; |
1721 | 0 | tran_low_t x10 = input[5]; |
1722 | 0 | tran_low_t x11 = input[10]; |
1723 | 0 | tran_low_t x12 = input[3]; |
1724 | 0 | tran_low_t x13 = input[12]; |
1725 | 0 | tran_low_t x14 = input[1]; |
1726 | 0 | tran_low_t x15 = input[14]; |
1727 | 0 | (void)bd; |
1728 | |
|
1729 | 0 | if (detect_invalid_highbd_input(input, 16)) { |
1730 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
1731 | | assert(0 && "invalid highbd txfm input"); |
1732 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
1733 | 0 | memset(output, 0, sizeof(*output) * 16); |
1734 | 0 | return; |
1735 | 0 | } |
1736 | | |
1737 | 0 | if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | |
1738 | 0 | x13 | x14 | x15)) { |
1739 | 0 | memset(output, 0, 16 * sizeof(*output)); |
1740 | 0 | return; |
1741 | 0 | } |
1742 | | |
1743 | | // stage 1 |
1744 | 0 | s0 = x0 * (tran_high_t)cospi_1_64 + x1 * (tran_high_t)cospi_31_64; |
1745 | 0 | s1 = x0 * (tran_high_t)cospi_31_64 - x1 * (tran_high_t)cospi_1_64; |
1746 | 0 | s2 = x2 * (tran_high_t)cospi_5_64 + x3 * (tran_high_t)cospi_27_64; |
1747 | 0 | s3 = x2 * (tran_high_t)cospi_27_64 - x3 * (tran_high_t)cospi_5_64; |
1748 | 0 | s4 = x4 * (tran_high_t)cospi_9_64 + x5 * (tran_high_t)cospi_23_64; |
1749 | 0 | s5 = x4 * (tran_high_t)cospi_23_64 - x5 * (tran_high_t)cospi_9_64; |
1750 | 0 | s6 = x6 * (tran_high_t)cospi_13_64 + x7 * (tran_high_t)cospi_19_64; |
1751 | 0 | s7 = x6 * (tran_high_t)cospi_19_64 - x7 * (tran_high_t)cospi_13_64; |
1752 | 0 | s8 = x8 * (tran_high_t)cospi_17_64 + x9 * (tran_high_t)cospi_15_64; |
1753 | 0 | s9 = x8 * (tran_high_t)cospi_15_64 - x9 * (tran_high_t)cospi_17_64; |
1754 | 0 | s10 = x10 * (tran_high_t)cospi_21_64 + x11 * (tran_high_t)cospi_11_64; |
1755 | 0 | s11 = x10 * (tran_high_t)cospi_11_64 - x11 * (tran_high_t)cospi_21_64; |
1756 | 0 | s12 = x12 * (tran_high_t)cospi_25_64 + x13 * (tran_high_t)cospi_7_64; |
1757 | 0 | s13 = x12 * (tran_high_t)cospi_7_64 - x13 * (tran_high_t)cospi_25_64; |
1758 | 0 | s14 = x14 * (tran_high_t)cospi_29_64 + x15 * (tran_high_t)cospi_3_64; |
1759 | 0 | s15 = x14 * (tran_high_t)cospi_3_64 - x15 * (tran_high_t)cospi_29_64; |
1760 | |
|
1761 | 0 | x0 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s8), bd); |
1762 | 0 | x1 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s9), bd); |
1763 | 0 | x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 + s10), bd); |
1764 | 0 | x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 + s11), bd); |
1765 | 0 | x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s12), bd); |
1766 | 0 | x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s13), bd); |
1767 | 0 | x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6 + s14), bd); |
1768 | 0 | x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7 + s15), bd); |
1769 | 0 | x8 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 - s8), bd); |
1770 | 0 | x9 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 - s9), bd); |
1771 | 0 | x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 - s10), bd); |
1772 | 0 | x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 - s11), bd); |
1773 | 0 | x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s12), bd); |
1774 | 0 | x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s13), bd); |
1775 | 0 | x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s6 - s14), bd); |
1776 | 0 | x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s7 - s15), bd); |
1777 | | |
1778 | | // stage 2 |
1779 | 0 | s0 = x0; |
1780 | 0 | s1 = x1; |
1781 | 0 | s2 = x2; |
1782 | 0 | s3 = x3; |
1783 | 0 | s4 = x4; |
1784 | 0 | s5 = x5; |
1785 | 0 | s6 = x6; |
1786 | 0 | s7 = x7; |
1787 | 0 | s8 = x8 * (tran_high_t)cospi_4_64 + x9 * (tran_high_t)cospi_28_64; |
1788 | 0 | s9 = x8 * (tran_high_t)cospi_28_64 - x9 * (tran_high_t)cospi_4_64; |
1789 | 0 | s10 = x10 * (tran_high_t)cospi_20_64 + x11 * (tran_high_t)cospi_12_64; |
1790 | 0 | s11 = x10 * (tran_high_t)cospi_12_64 - x11 * (tran_high_t)cospi_20_64; |
1791 | 0 | s12 = -x12 * (tran_high_t)cospi_28_64 + x13 * (tran_high_t)cospi_4_64; |
1792 | 0 | s13 = x12 * (tran_high_t)cospi_4_64 + x13 * (tran_high_t)cospi_28_64; |
1793 | 0 | s14 = -x14 * (tran_high_t)cospi_12_64 + x15 * (tran_high_t)cospi_20_64; |
1794 | 0 | s15 = x14 * (tran_high_t)cospi_20_64 + x15 * (tran_high_t)cospi_12_64; |
1795 | |
|
1796 | 0 | x0 = HIGHBD_WRAPLOW(s0 + s4, bd); |
1797 | 0 | x1 = HIGHBD_WRAPLOW(s1 + s5, bd); |
1798 | 0 | x2 = HIGHBD_WRAPLOW(s2 + s6, bd); |
1799 | 0 | x3 = HIGHBD_WRAPLOW(s3 + s7, bd); |
1800 | 0 | x4 = HIGHBD_WRAPLOW(s0 - s4, bd); |
1801 | 0 | x5 = HIGHBD_WRAPLOW(s1 - s5, bd); |
1802 | 0 | x6 = HIGHBD_WRAPLOW(s2 - s6, bd); |
1803 | 0 | x7 = HIGHBD_WRAPLOW(s3 - s7, bd); |
1804 | 0 | x8 = HIGHBD_WRAPLOW(dct_const_round_shift(s8 + s12), bd); |
1805 | 0 | x9 = HIGHBD_WRAPLOW(dct_const_round_shift(s9 + s13), bd); |
1806 | 0 | x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s10 + s14), bd); |
1807 | 0 | x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s11 + s15), bd); |
1808 | 0 | x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s8 - s12), bd); |
1809 | 0 | x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s9 - s13), bd); |
1810 | 0 | x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s10 - s14), bd); |
1811 | 0 | x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s11 - s15), bd); |
1812 | | |
1813 | | // stage 3 |
1814 | 0 | s0 = x0; |
1815 | 0 | s1 = x1; |
1816 | 0 | s2 = x2; |
1817 | 0 | s3 = x3; |
1818 | 0 | s4 = x4 * (tran_high_t)cospi_8_64 + x5 * (tran_high_t)cospi_24_64; |
1819 | 0 | s5 = x4 * (tran_high_t)cospi_24_64 - x5 * (tran_high_t)cospi_8_64; |
1820 | 0 | s6 = -x6 * (tran_high_t)cospi_24_64 + x7 * (tran_high_t)cospi_8_64; |
1821 | 0 | s7 = x6 * (tran_high_t)cospi_8_64 + x7 * (tran_high_t)cospi_24_64; |
1822 | 0 | s8 = x8; |
1823 | 0 | s9 = x9; |
1824 | 0 | s10 = x10; |
1825 | 0 | s11 = x11; |
1826 | 0 | s12 = x12 * (tran_high_t)cospi_8_64 + x13 * (tran_high_t)cospi_24_64; |
1827 | 0 | s13 = x12 * (tran_high_t)cospi_24_64 - x13 * (tran_high_t)cospi_8_64; |
1828 | 0 | s14 = -x14 * (tran_high_t)cospi_24_64 + x15 * (tran_high_t)cospi_8_64; |
1829 | 0 | s15 = x14 * (tran_high_t)cospi_8_64 + x15 * (tran_high_t)cospi_24_64; |
1830 | |
|
1831 | 0 | x0 = HIGHBD_WRAPLOW(s0 + s2, bd); |
1832 | 0 | x1 = HIGHBD_WRAPLOW(s1 + s3, bd); |
1833 | 0 | x2 = HIGHBD_WRAPLOW(s0 - s2, bd); |
1834 | 0 | x3 = HIGHBD_WRAPLOW(s1 - s3, bd); |
1835 | 0 | x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s6), bd); |
1836 | 0 | x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s7), bd); |
1837 | 0 | x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s6), bd); |
1838 | 0 | x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s7), bd); |
1839 | 0 | x8 = HIGHBD_WRAPLOW(s8 + s10, bd); |
1840 | 0 | x9 = HIGHBD_WRAPLOW(s9 + s11, bd); |
1841 | 0 | x10 = HIGHBD_WRAPLOW(s8 - s10, bd); |
1842 | 0 | x11 = HIGHBD_WRAPLOW(s9 - s11, bd); |
1843 | 0 | x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s12 + s14), bd); |
1844 | 0 | x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s13 + s15), bd); |
1845 | 0 | x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s12 - s14), bd); |
1846 | 0 | x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s13 - s15), bd); |
1847 | | |
1848 | | // stage 4 |
1849 | 0 | s2 = (tran_high_t)(-cospi_16_64) * (x2 + x3); |
1850 | 0 | s3 = (tran_high_t)cospi_16_64 * (x2 - x3); |
1851 | 0 | s6 = (tran_high_t)cospi_16_64 * (x6 + x7); |
1852 | 0 | s7 = (tran_high_t)cospi_16_64 * (-x6 + x7); |
1853 | 0 | s10 = (tran_high_t)cospi_16_64 * (x10 + x11); |
1854 | 0 | s11 = (tran_high_t)cospi_16_64 * (-x10 + x11); |
1855 | 0 | s14 = (tran_high_t)(-cospi_16_64) * (x14 + x15); |
1856 | 0 | s15 = (tran_high_t)cospi_16_64 * (x14 - x15); |
1857 | |
|
1858 | 0 | x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd); |
1859 | 0 | x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3), bd); |
1860 | 0 | x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6), bd); |
1861 | 0 | x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7), bd); |
1862 | 0 | x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s10), bd); |
1863 | 0 | x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s11), bd); |
1864 | 0 | x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s14), bd); |
1865 | 0 | x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s15), bd); |
1866 | |
|
1867 | 0 | output[0] = HIGHBD_WRAPLOW(x0, bd); |
1868 | 0 | output[1] = HIGHBD_WRAPLOW(-x8, bd); |
1869 | 0 | output[2] = HIGHBD_WRAPLOW(x12, bd); |
1870 | 0 | output[3] = HIGHBD_WRAPLOW(-x4, bd); |
1871 | 0 | output[4] = HIGHBD_WRAPLOW(x6, bd); |
1872 | 0 | output[5] = HIGHBD_WRAPLOW(x14, bd); |
1873 | 0 | output[6] = HIGHBD_WRAPLOW(x10, bd); |
1874 | 0 | output[7] = HIGHBD_WRAPLOW(x2, bd); |
1875 | 0 | output[8] = HIGHBD_WRAPLOW(x3, bd); |
1876 | 0 | output[9] = HIGHBD_WRAPLOW(x11, bd); |
1877 | 0 | output[10] = HIGHBD_WRAPLOW(x15, bd); |
1878 | 0 | output[11] = HIGHBD_WRAPLOW(x7, bd); |
1879 | 0 | output[12] = HIGHBD_WRAPLOW(x5, bd); |
1880 | 0 | output[13] = HIGHBD_WRAPLOW(-x13, bd); |
1881 | 0 | output[14] = HIGHBD_WRAPLOW(x9, bd); |
1882 | 0 | output[15] = HIGHBD_WRAPLOW(-x1, bd); |
1883 | 0 | } |
1884 | | |
1885 | 0 | void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1886 | 0 | tran_low_t step1[16], step2[16]; |
1887 | 0 | tran_high_t temp1, temp2; |
1888 | 0 | (void)bd; |
1889 | |
|
1890 | 0 | if (detect_invalid_highbd_input(input, 16)) { |
1891 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
1892 | | assert(0 && "invalid highbd txfm input"); |
1893 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
1894 | 0 | memset(output, 0, sizeof(*output) * 16); |
1895 | 0 | return; |
1896 | 0 | } |
1897 | | |
1898 | | // stage 1 |
1899 | 0 | step1[0] = input[0 / 2]; |
1900 | 0 | step1[1] = input[16 / 2]; |
1901 | 0 | step1[2] = input[8 / 2]; |
1902 | 0 | step1[3] = input[24 / 2]; |
1903 | 0 | step1[4] = input[4 / 2]; |
1904 | 0 | step1[5] = input[20 / 2]; |
1905 | 0 | step1[6] = input[12 / 2]; |
1906 | 0 | step1[7] = input[28 / 2]; |
1907 | 0 | step1[8] = input[2 / 2]; |
1908 | 0 | step1[9] = input[18 / 2]; |
1909 | 0 | step1[10] = input[10 / 2]; |
1910 | 0 | step1[11] = input[26 / 2]; |
1911 | 0 | step1[12] = input[6 / 2]; |
1912 | 0 | step1[13] = input[22 / 2]; |
1913 | 0 | step1[14] = input[14 / 2]; |
1914 | 0 | step1[15] = input[30 / 2]; |
1915 | | |
1916 | | // stage 2 |
1917 | 0 | step2[0] = step1[0]; |
1918 | 0 | step2[1] = step1[1]; |
1919 | 0 | step2[2] = step1[2]; |
1920 | 0 | step2[3] = step1[3]; |
1921 | 0 | step2[4] = step1[4]; |
1922 | 0 | step2[5] = step1[5]; |
1923 | 0 | step2[6] = step1[6]; |
1924 | 0 | step2[7] = step1[7]; |
1925 | |
|
1926 | 0 | temp1 = |
1927 | 0 | step1[8] * (tran_high_t)cospi_30_64 - step1[15] * (tran_high_t)cospi_2_64; |
1928 | 0 | temp2 = |
1929 | 0 | step1[8] * (tran_high_t)cospi_2_64 + step1[15] * (tran_high_t)cospi_30_64; |
1930 | 0 | step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1931 | 0 | step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1932 | |
|
1933 | 0 | temp1 = step1[9] * (tran_high_t)cospi_14_64 - |
1934 | 0 | step1[14] * (tran_high_t)cospi_18_64; |
1935 | 0 | temp2 = step1[9] * (tran_high_t)cospi_18_64 + |
1936 | 0 | step1[14] * (tran_high_t)cospi_14_64; |
1937 | 0 | step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1938 | 0 | step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1939 | |
|
1940 | 0 | temp1 = step1[10] * (tran_high_t)cospi_22_64 - |
1941 | 0 | step1[13] * (tran_high_t)cospi_10_64; |
1942 | 0 | temp2 = step1[10] * (tran_high_t)cospi_10_64 + |
1943 | 0 | step1[13] * (tran_high_t)cospi_22_64; |
1944 | 0 | step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1945 | 0 | step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1946 | |
|
1947 | 0 | temp1 = step1[11] * (tran_high_t)cospi_6_64 - |
1948 | 0 | step1[12] * (tran_high_t)cospi_26_64; |
1949 | 0 | temp2 = step1[11] * (tran_high_t)cospi_26_64 + |
1950 | 0 | step1[12] * (tran_high_t)cospi_6_64; |
1951 | 0 | step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1952 | 0 | step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1953 | | |
1954 | | // stage 3 |
1955 | 0 | step1[0] = step2[0]; |
1956 | 0 | step1[1] = step2[1]; |
1957 | 0 | step1[2] = step2[2]; |
1958 | 0 | step1[3] = step2[3]; |
1959 | |
|
1960 | 0 | temp1 = |
1961 | 0 | step2[4] * (tran_high_t)cospi_28_64 - step2[7] * (tran_high_t)cospi_4_64; |
1962 | 0 | temp2 = |
1963 | 0 | step2[4] * (tran_high_t)cospi_4_64 + step2[7] * (tran_high_t)cospi_28_64; |
1964 | 0 | step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1965 | 0 | step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1966 | 0 | temp1 = |
1967 | 0 | step2[5] * (tran_high_t)cospi_12_64 - step2[6] * (tran_high_t)cospi_20_64; |
1968 | 0 | temp2 = |
1969 | 0 | step2[5] * (tran_high_t)cospi_20_64 + step2[6] * (tran_high_t)cospi_12_64; |
1970 | 0 | step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1971 | 0 | step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1972 | |
|
1973 | 0 | step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); |
1974 | 0 | step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); |
1975 | 0 | step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); |
1976 | 0 | step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); |
1977 | 0 | step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); |
1978 | 0 | step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); |
1979 | 0 | step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); |
1980 | 0 | step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); |
1981 | | |
1982 | | // stage 4 |
1983 | 0 | temp1 = (step1[0] + step1[1]) * (tran_high_t)cospi_16_64; |
1984 | 0 | temp2 = (step1[0] - step1[1]) * (tran_high_t)cospi_16_64; |
1985 | 0 | step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1986 | 0 | step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1987 | 0 | temp1 = |
1988 | 0 | step1[2] * (tran_high_t)cospi_24_64 - step1[3] * (tran_high_t)cospi_8_64; |
1989 | 0 | temp2 = |
1990 | 0 | step1[2] * (tran_high_t)cospi_8_64 + step1[3] * (tran_high_t)cospi_24_64; |
1991 | 0 | step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
1992 | 0 | step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
1993 | 0 | step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); |
1994 | 0 | step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); |
1995 | 0 | step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); |
1996 | 0 | step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); |
1997 | |
|
1998 | 0 | step2[8] = step1[8]; |
1999 | 0 | step2[15] = step1[15]; |
2000 | 0 | temp1 = -step1[9] * (tran_high_t)cospi_8_64 + |
2001 | 0 | step1[14] * (tran_high_t)cospi_24_64; |
2002 | 0 | temp2 = |
2003 | 0 | step1[9] * (tran_high_t)cospi_24_64 + step1[14] * (tran_high_t)cospi_8_64; |
2004 | 0 | step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2005 | 0 | step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2006 | 0 | temp1 = -step1[10] * (tran_high_t)cospi_24_64 - |
2007 | 0 | step1[13] * (tran_high_t)cospi_8_64; |
2008 | 0 | temp2 = -step1[10] * (tran_high_t)cospi_8_64 + |
2009 | 0 | step1[13] * (tran_high_t)cospi_24_64; |
2010 | 0 | step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2011 | 0 | step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2012 | 0 | step2[11] = step1[11]; |
2013 | 0 | step2[12] = step1[12]; |
2014 | | |
2015 | | // stage 5 |
2016 | 0 | step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); |
2017 | 0 | step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); |
2018 | 0 | step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); |
2019 | 0 | step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); |
2020 | 0 | step1[4] = step2[4]; |
2021 | 0 | temp1 = (step2[6] - step2[5]) * (tran_high_t)cospi_16_64; |
2022 | 0 | temp2 = (step2[5] + step2[6]) * (tran_high_t)cospi_16_64; |
2023 | 0 | step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2024 | 0 | step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2025 | 0 | step1[7] = step2[7]; |
2026 | |
|
2027 | 0 | step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); |
2028 | 0 | step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); |
2029 | 0 | step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); |
2030 | 0 | step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); |
2031 | 0 | step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); |
2032 | 0 | step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); |
2033 | 0 | step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); |
2034 | 0 | step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); |
2035 | | |
2036 | | // stage 6 |
2037 | 0 | step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); |
2038 | 0 | step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); |
2039 | 0 | step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); |
2040 | 0 | step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); |
2041 | 0 | step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); |
2042 | 0 | step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); |
2043 | 0 | step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); |
2044 | 0 | step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); |
2045 | 0 | step2[8] = step1[8]; |
2046 | 0 | step2[9] = step1[9]; |
2047 | 0 | temp1 = (-step1[10] + step1[13]) * (tran_high_t)cospi_16_64; |
2048 | 0 | temp2 = (step1[10] + step1[13]) * (tran_high_t)cospi_16_64; |
2049 | 0 | step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2050 | 0 | step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2051 | 0 | temp1 = (-step1[11] + step1[12]) * (tran_high_t)cospi_16_64; |
2052 | 0 | temp2 = (step1[11] + step1[12]) * (tran_high_t)cospi_16_64; |
2053 | 0 | step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2054 | 0 | step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2055 | 0 | step2[14] = step1[14]; |
2056 | 0 | step2[15] = step1[15]; |
2057 | | |
2058 | | // stage 7 |
2059 | 0 | output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); |
2060 | 0 | output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); |
2061 | 0 | output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); |
2062 | 0 | output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); |
2063 | 0 | output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); |
2064 | 0 | output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); |
2065 | 0 | output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); |
2066 | 0 | output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); |
2067 | 0 | output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); |
2068 | 0 | output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); |
2069 | 0 | output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); |
2070 | 0 | output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); |
2071 | 0 | output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); |
2072 | 0 | output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); |
2073 | 0 | output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); |
2074 | 0 | output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); |
2075 | 0 | } |
2076 | | |
2077 | | void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint16_t *dest, |
2078 | 0 | int stride, int bd) { |
2079 | 0 | int i, j; |
2080 | 0 | tran_low_t out[16 * 16]; |
2081 | 0 | tran_low_t *outptr = out; |
2082 | 0 | tran_low_t temp_in[16], temp_out[16]; |
2083 | | |
2084 | | // First transform rows |
2085 | 0 | for (i = 0; i < 16; ++i) { |
2086 | 0 | vpx_highbd_idct16_c(input, outptr, bd); |
2087 | 0 | input += 16; |
2088 | 0 | outptr += 16; |
2089 | 0 | } |
2090 | | |
2091 | | // Then transform columns |
2092 | 0 | for (i = 0; i < 16; ++i) { |
2093 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
2094 | 0 | vpx_highbd_idct16_c(temp_in, temp_out, bd); |
2095 | 0 | for (j = 0; j < 16; ++j) { |
2096 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
2097 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2098 | 0 | } |
2099 | 0 | } |
2100 | 0 | } |
2101 | | |
2102 | | void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint16_t *dest, |
2103 | 0 | int stride, int bd) { |
2104 | 0 | int i, j; |
2105 | 0 | tran_low_t out[16 * 16] = { 0 }; |
2106 | 0 | tran_low_t *outptr = out; |
2107 | 0 | tran_low_t temp_in[16], temp_out[16]; |
2108 | | |
2109 | | // First transform rows. Since all non-zero dct coefficients are in |
2110 | | // upper-left 8x8 area, we only need to calculate first 8 rows here. |
2111 | 0 | for (i = 0; i < 8; ++i) { |
2112 | 0 | vpx_highbd_idct16_c(input, outptr, bd); |
2113 | 0 | input += 16; |
2114 | 0 | outptr += 16; |
2115 | 0 | } |
2116 | | |
2117 | | // Then transform columns |
2118 | 0 | for (i = 0; i < 16; ++i) { |
2119 | 0 | uint16_t *destT = dest; |
2120 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
2121 | 0 | vpx_highbd_idct16_c(temp_in, temp_out, bd); |
2122 | 0 | for (j = 0; j < 16; ++j) { |
2123 | 0 | destT[i] = highbd_clip_pixel_add(destT[i], |
2124 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2125 | 0 | destT += stride; |
2126 | 0 | } |
2127 | 0 | } |
2128 | 0 | } |
2129 | | |
2130 | | void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint16_t *dest, |
2131 | 0 | int stride, int bd) { |
2132 | 0 | int i, j; |
2133 | 0 | tran_low_t out[16 * 16] = { 0 }; |
2134 | 0 | tran_low_t *outptr = out; |
2135 | 0 | tran_low_t temp_in[16], temp_out[16]; |
2136 | | |
2137 | | // First transform rows. Since all non-zero dct coefficients are in |
2138 | | // upper-left 4x4 area, we only need to calculate first 4 rows here. |
2139 | 0 | for (i = 0; i < 4; ++i) { |
2140 | 0 | vpx_highbd_idct16_c(input, outptr, bd); |
2141 | 0 | input += 16; |
2142 | 0 | outptr += 16; |
2143 | 0 | } |
2144 | | |
2145 | | // Then transform columns |
2146 | 0 | for (i = 0; i < 16; ++i) { |
2147 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
2148 | 0 | vpx_highbd_idct16_c(temp_in, temp_out, bd); |
2149 | 0 | for (j = 0; j < 16; ++j) { |
2150 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
2151 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2152 | 0 | } |
2153 | 0 | } |
2154 | 0 | } |
2155 | | |
2156 | | void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint16_t *dest, |
2157 | 0 | int stride, int bd) { |
2158 | 0 | int i, j; |
2159 | 0 | tran_high_t a1; |
2160 | 0 | tran_low_t out = HIGHBD_WRAPLOW( |
2161 | 0 | dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); |
2162 | |
|
2163 | 0 | out = |
2164 | 0 | HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); |
2165 | 0 | a1 = ROUND_POWER_OF_TWO(out, 6); |
2166 | 0 | for (j = 0; j < 16; ++j) { |
2167 | 0 | for (i = 0; i < 16; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2168 | 0 | dest += stride; |
2169 | 0 | } |
2170 | 0 | } |
2171 | | |
2172 | | static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, |
2173 | 0 | int bd) { |
2174 | 0 | tran_low_t step1[32], step2[32]; |
2175 | 0 | tran_high_t temp1, temp2; |
2176 | 0 | (void)bd; |
2177 | |
|
2178 | 0 | if (detect_invalid_highbd_input(input, 32)) { |
2179 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
2180 | | assert(0 && "invalid highbd txfm input"); |
2181 | | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
2182 | 0 | memset(output, 0, sizeof(*output) * 32); |
2183 | 0 | return; |
2184 | 0 | } |
2185 | | |
2186 | | // stage 1 |
2187 | 0 | step1[0] = input[0]; |
2188 | 0 | step1[1] = input[16]; |
2189 | 0 | step1[2] = input[8]; |
2190 | 0 | step1[3] = input[24]; |
2191 | 0 | step1[4] = input[4]; |
2192 | 0 | step1[5] = input[20]; |
2193 | 0 | step1[6] = input[12]; |
2194 | 0 | step1[7] = input[28]; |
2195 | 0 | step1[8] = input[2]; |
2196 | 0 | step1[9] = input[18]; |
2197 | 0 | step1[10] = input[10]; |
2198 | 0 | step1[11] = input[26]; |
2199 | 0 | step1[12] = input[6]; |
2200 | 0 | step1[13] = input[22]; |
2201 | 0 | step1[14] = input[14]; |
2202 | 0 | step1[15] = input[30]; |
2203 | |
|
2204 | 0 | temp1 = |
2205 | 0 | input[1] * (tran_high_t)cospi_31_64 - input[31] * (tran_high_t)cospi_1_64; |
2206 | 0 | temp2 = |
2207 | 0 | input[1] * (tran_high_t)cospi_1_64 + input[31] * (tran_high_t)cospi_31_64; |
2208 | 0 | step1[16] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2209 | 0 | step1[31] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2210 | |
|
2211 | 0 | temp1 = input[17] * (tran_high_t)cospi_15_64 - |
2212 | 0 | input[15] * (tran_high_t)cospi_17_64; |
2213 | 0 | temp2 = input[17] * (tran_high_t)cospi_17_64 + |
2214 | 0 | input[15] * (tran_high_t)cospi_15_64; |
2215 | 0 | step1[17] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2216 | 0 | step1[30] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2217 | |
|
2218 | 0 | temp1 = |
2219 | 0 | input[9] * (tran_high_t)cospi_23_64 - input[23] * (tran_high_t)cospi_9_64; |
2220 | 0 | temp2 = |
2221 | 0 | input[9] * (tran_high_t)cospi_9_64 + input[23] * (tran_high_t)cospi_23_64; |
2222 | 0 | step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2223 | 0 | step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2224 | |
|
2225 | 0 | temp1 = |
2226 | 0 | input[25] * (tran_high_t)cospi_7_64 - input[7] * (tran_high_t)cospi_25_64; |
2227 | 0 | temp2 = |
2228 | 0 | input[25] * (tran_high_t)cospi_25_64 + input[7] * (tran_high_t)cospi_7_64; |
2229 | 0 | step1[19] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2230 | 0 | step1[28] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2231 | |
|
2232 | 0 | temp1 = |
2233 | 0 | input[5] * (tran_high_t)cospi_27_64 - input[27] * (tran_high_t)cospi_5_64; |
2234 | 0 | temp2 = |
2235 | 0 | input[5] * (tran_high_t)cospi_5_64 + input[27] * (tran_high_t)cospi_27_64; |
2236 | 0 | step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2237 | 0 | step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2238 | |
|
2239 | 0 | temp1 = input[21] * (tran_high_t)cospi_11_64 - |
2240 | 0 | input[11] * (tran_high_t)cospi_21_64; |
2241 | 0 | temp2 = input[21] * (tran_high_t)cospi_21_64 + |
2242 | 0 | input[11] * (tran_high_t)cospi_11_64; |
2243 | 0 | step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2244 | 0 | step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2245 | |
|
2246 | 0 | temp1 = input[13] * (tran_high_t)cospi_19_64 - |
2247 | 0 | input[19] * (tran_high_t)cospi_13_64; |
2248 | 0 | temp2 = input[13] * (tran_high_t)cospi_13_64 + |
2249 | 0 | input[19] * (tran_high_t)cospi_19_64; |
2250 | 0 | step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2251 | 0 | step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2252 | |
|
2253 | 0 | temp1 = |
2254 | 0 | input[29] * (tran_high_t)cospi_3_64 - input[3] * (tran_high_t)cospi_29_64; |
2255 | 0 | temp2 = |
2256 | 0 | input[29] * (tran_high_t)cospi_29_64 + input[3] * (tran_high_t)cospi_3_64; |
2257 | 0 | step1[23] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2258 | 0 | step1[24] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2259 | | |
2260 | | // stage 2 |
2261 | 0 | step2[0] = step1[0]; |
2262 | 0 | step2[1] = step1[1]; |
2263 | 0 | step2[2] = step1[2]; |
2264 | 0 | step2[3] = step1[3]; |
2265 | 0 | step2[4] = step1[4]; |
2266 | 0 | step2[5] = step1[5]; |
2267 | 0 | step2[6] = step1[6]; |
2268 | 0 | step2[7] = step1[7]; |
2269 | |
|
2270 | 0 | temp1 = |
2271 | 0 | step1[8] * (tran_high_t)cospi_30_64 - step1[15] * (tran_high_t)cospi_2_64; |
2272 | 0 | temp2 = |
2273 | 0 | step1[8] * (tran_high_t)cospi_2_64 + step1[15] * (tran_high_t)cospi_30_64; |
2274 | 0 | step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2275 | 0 | step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2276 | |
|
2277 | 0 | temp1 = step1[9] * (tran_high_t)cospi_14_64 - |
2278 | 0 | step1[14] * (tran_high_t)cospi_18_64; |
2279 | 0 | temp2 = step1[9] * (tran_high_t)cospi_18_64 + |
2280 | 0 | step1[14] * (tran_high_t)cospi_14_64; |
2281 | 0 | step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2282 | 0 | step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2283 | |
|
2284 | 0 | temp1 = step1[10] * (tran_high_t)cospi_22_64 - |
2285 | 0 | step1[13] * (tran_high_t)cospi_10_64; |
2286 | 0 | temp2 = step1[10] * (tran_high_t)cospi_10_64 + |
2287 | 0 | step1[13] * (tran_high_t)cospi_22_64; |
2288 | 0 | step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2289 | 0 | step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2290 | |
|
2291 | 0 | temp1 = step1[11] * (tran_high_t)cospi_6_64 - |
2292 | 0 | step1[12] * (tran_high_t)cospi_26_64; |
2293 | 0 | temp2 = step1[11] * (tran_high_t)cospi_26_64 + |
2294 | 0 | step1[12] * (tran_high_t)cospi_6_64; |
2295 | 0 | step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2296 | 0 | step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2297 | |
|
2298 | 0 | step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); |
2299 | 0 | step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); |
2300 | 0 | step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); |
2301 | 0 | step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); |
2302 | 0 | step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); |
2303 | 0 | step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); |
2304 | 0 | step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); |
2305 | 0 | step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); |
2306 | 0 | step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); |
2307 | 0 | step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); |
2308 | 0 | step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); |
2309 | 0 | step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); |
2310 | 0 | step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); |
2311 | 0 | step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); |
2312 | 0 | step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); |
2313 | 0 | step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); |
2314 | | |
2315 | | // stage 3 |
2316 | 0 | step1[0] = step2[0]; |
2317 | 0 | step1[1] = step2[1]; |
2318 | 0 | step1[2] = step2[2]; |
2319 | 0 | step1[3] = step2[3]; |
2320 | |
|
2321 | 0 | temp1 = |
2322 | 0 | step2[4] * (tran_high_t)cospi_28_64 - step2[7] * (tran_high_t)cospi_4_64; |
2323 | 0 | temp2 = |
2324 | 0 | step2[4] * (tran_high_t)cospi_4_64 + step2[7] * (tran_high_t)cospi_28_64; |
2325 | 0 | step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2326 | 0 | step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2327 | 0 | temp1 = |
2328 | 0 | step2[5] * (tran_high_t)cospi_12_64 - step2[6] * (tran_high_t)cospi_20_64; |
2329 | 0 | temp2 = |
2330 | 0 | step2[5] * (tran_high_t)cospi_20_64 + step2[6] * (tran_high_t)cospi_12_64; |
2331 | 0 | step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2332 | 0 | step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2333 | |
|
2334 | 0 | step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); |
2335 | 0 | step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); |
2336 | 0 | step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); |
2337 | 0 | step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); |
2338 | 0 | step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); |
2339 | 0 | step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); |
2340 | 0 | step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); |
2341 | 0 | step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); |
2342 | |
|
2343 | 0 | step1[16] = step2[16]; |
2344 | 0 | step1[31] = step2[31]; |
2345 | 0 | temp1 = -step2[17] * (tran_high_t)cospi_4_64 + |
2346 | 0 | step2[30] * (tran_high_t)cospi_28_64; |
2347 | 0 | temp2 = step2[17] * (tran_high_t)cospi_28_64 + |
2348 | 0 | step2[30] * (tran_high_t)cospi_4_64; |
2349 | 0 | step1[17] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2350 | 0 | step1[30] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2351 | 0 | temp1 = -step2[18] * (tran_high_t)cospi_28_64 - |
2352 | 0 | step2[29] * (tran_high_t)cospi_4_64; |
2353 | 0 | temp2 = -step2[18] * (tran_high_t)cospi_4_64 + |
2354 | 0 | step2[29] * (tran_high_t)cospi_28_64; |
2355 | 0 | step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2356 | 0 | step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2357 | 0 | step1[19] = step2[19]; |
2358 | 0 | step1[20] = step2[20]; |
2359 | 0 | temp1 = -step2[21] * (tran_high_t)cospi_20_64 + |
2360 | 0 | step2[26] * (tran_high_t)cospi_12_64; |
2361 | 0 | temp2 = step2[21] * (tran_high_t)cospi_12_64 + |
2362 | 0 | step2[26] * (tran_high_t)cospi_20_64; |
2363 | 0 | step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2364 | 0 | step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2365 | 0 | temp1 = -step2[22] * (tran_high_t)cospi_12_64 - |
2366 | 0 | step2[25] * (tran_high_t)cospi_20_64; |
2367 | 0 | temp2 = -step2[22] * (tran_high_t)cospi_20_64 + |
2368 | 0 | step2[25] * (tran_high_t)cospi_12_64; |
2369 | 0 | step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2370 | 0 | step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2371 | 0 | step1[23] = step2[23]; |
2372 | 0 | step1[24] = step2[24]; |
2373 | 0 | step1[27] = step2[27]; |
2374 | 0 | step1[28] = step2[28]; |
2375 | | |
2376 | | // stage 4 |
2377 | 0 | temp1 = (step1[0] + step1[1]) * (tran_high_t)cospi_16_64; |
2378 | 0 | temp2 = (step1[0] - step1[1]) * (tran_high_t)cospi_16_64; |
2379 | 0 | step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2380 | 0 | step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2381 | 0 | temp1 = |
2382 | 0 | step1[2] * (tran_high_t)cospi_24_64 - step1[3] * (tran_high_t)cospi_8_64; |
2383 | 0 | temp2 = |
2384 | 0 | step1[2] * (tran_high_t)cospi_8_64 + step1[3] * (tran_high_t)cospi_24_64; |
2385 | 0 | step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2386 | 0 | step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2387 | 0 | step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); |
2388 | 0 | step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); |
2389 | 0 | step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); |
2390 | 0 | step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); |
2391 | |
|
2392 | 0 | step2[8] = step1[8]; |
2393 | 0 | step2[15] = step1[15]; |
2394 | 0 | temp1 = -step1[9] * (tran_high_t)cospi_8_64 + |
2395 | 0 | step1[14] * (tran_high_t)cospi_24_64; |
2396 | 0 | temp2 = |
2397 | 0 | step1[9] * (tran_high_t)cospi_24_64 + step1[14] * (tran_high_t)cospi_8_64; |
2398 | 0 | step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2399 | 0 | step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2400 | 0 | temp1 = -step1[10] * (tran_high_t)cospi_24_64 - |
2401 | 0 | step1[13] * (tran_high_t)cospi_8_64; |
2402 | 0 | temp2 = -step1[10] * (tran_high_t)cospi_8_64 + |
2403 | 0 | step1[13] * (tran_high_t)cospi_24_64; |
2404 | 0 | step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2405 | 0 | step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2406 | 0 | step2[11] = step1[11]; |
2407 | 0 | step2[12] = step1[12]; |
2408 | |
|
2409 | 0 | step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); |
2410 | 0 | step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); |
2411 | 0 | step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); |
2412 | 0 | step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); |
2413 | 0 | step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); |
2414 | 0 | step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); |
2415 | 0 | step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); |
2416 | 0 | step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); |
2417 | |
|
2418 | 0 | step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); |
2419 | 0 | step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); |
2420 | 0 | step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); |
2421 | 0 | step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); |
2422 | 0 | step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); |
2423 | 0 | step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); |
2424 | 0 | step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); |
2425 | 0 | step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); |
2426 | | |
2427 | | // stage 5 |
2428 | 0 | step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); |
2429 | 0 | step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); |
2430 | 0 | step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); |
2431 | 0 | step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); |
2432 | 0 | step1[4] = step2[4]; |
2433 | 0 | temp1 = (step2[6] - step2[5]) * (tran_high_t)cospi_16_64; |
2434 | 0 | temp2 = (step2[5] + step2[6]) * (tran_high_t)cospi_16_64; |
2435 | 0 | step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2436 | 0 | step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2437 | 0 | step1[7] = step2[7]; |
2438 | |
|
2439 | 0 | step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); |
2440 | 0 | step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); |
2441 | 0 | step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); |
2442 | 0 | step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); |
2443 | 0 | step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); |
2444 | 0 | step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); |
2445 | 0 | step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); |
2446 | 0 | step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); |
2447 | |
|
2448 | 0 | step1[16] = step2[16]; |
2449 | 0 | step1[17] = step2[17]; |
2450 | 0 | temp1 = -step2[18] * (tran_high_t)cospi_8_64 + |
2451 | 0 | step2[29] * (tran_high_t)cospi_24_64; |
2452 | 0 | temp2 = step2[18] * (tran_high_t)cospi_24_64 + |
2453 | 0 | step2[29] * (tran_high_t)cospi_8_64; |
2454 | 0 | step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2455 | 0 | step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2456 | 0 | temp1 = -step2[19] * (tran_high_t)cospi_8_64 + |
2457 | 0 | step2[28] * (tran_high_t)cospi_24_64; |
2458 | 0 | temp2 = step2[19] * (tran_high_t)cospi_24_64 + |
2459 | 0 | step2[28] * (tran_high_t)cospi_8_64; |
2460 | 0 | step1[19] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2461 | 0 | step1[28] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2462 | 0 | temp1 = -step2[20] * (tran_high_t)cospi_24_64 - |
2463 | 0 | step2[27] * (tran_high_t)cospi_8_64; |
2464 | 0 | temp2 = -step2[20] * (tran_high_t)cospi_8_64 + |
2465 | 0 | step2[27] * (tran_high_t)cospi_24_64; |
2466 | 0 | step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2467 | 0 | step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2468 | 0 | temp1 = -step2[21] * (tran_high_t)cospi_24_64 - |
2469 | 0 | step2[26] * (tran_high_t)cospi_8_64; |
2470 | 0 | temp2 = -step2[21] * (tran_high_t)cospi_8_64 + |
2471 | 0 | step2[26] * (tran_high_t)cospi_24_64; |
2472 | 0 | step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2473 | 0 | step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2474 | 0 | step1[22] = step2[22]; |
2475 | 0 | step1[23] = step2[23]; |
2476 | 0 | step1[24] = step2[24]; |
2477 | 0 | step1[25] = step2[25]; |
2478 | 0 | step1[30] = step2[30]; |
2479 | 0 | step1[31] = step2[31]; |
2480 | | |
2481 | | // stage 6 |
2482 | 0 | step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); |
2483 | 0 | step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); |
2484 | 0 | step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); |
2485 | 0 | step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); |
2486 | 0 | step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); |
2487 | 0 | step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); |
2488 | 0 | step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); |
2489 | 0 | step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); |
2490 | 0 | step2[8] = step1[8]; |
2491 | 0 | step2[9] = step1[9]; |
2492 | 0 | temp1 = (-step1[10] + step1[13]) * (tran_high_t)cospi_16_64; |
2493 | 0 | temp2 = (step1[10] + step1[13]) * (tran_high_t)cospi_16_64; |
2494 | 0 | step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2495 | 0 | step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2496 | 0 | temp1 = (-step1[11] + step1[12]) * (tran_high_t)cospi_16_64; |
2497 | 0 | temp2 = (step1[11] + step1[12]) * (tran_high_t)cospi_16_64; |
2498 | 0 | step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2499 | 0 | step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2500 | 0 | step2[14] = step1[14]; |
2501 | 0 | step2[15] = step1[15]; |
2502 | |
|
2503 | 0 | step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); |
2504 | 0 | step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); |
2505 | 0 | step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); |
2506 | 0 | step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); |
2507 | 0 | step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); |
2508 | 0 | step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); |
2509 | 0 | step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); |
2510 | 0 | step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); |
2511 | |
|
2512 | 0 | step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); |
2513 | 0 | step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); |
2514 | 0 | step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); |
2515 | 0 | step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); |
2516 | 0 | step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); |
2517 | 0 | step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); |
2518 | 0 | step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); |
2519 | 0 | step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); |
2520 | | |
2521 | | // stage 7 |
2522 | 0 | step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); |
2523 | 0 | step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); |
2524 | 0 | step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); |
2525 | 0 | step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); |
2526 | 0 | step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); |
2527 | 0 | step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); |
2528 | 0 | step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); |
2529 | 0 | step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); |
2530 | 0 | step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); |
2531 | 0 | step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); |
2532 | 0 | step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); |
2533 | 0 | step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); |
2534 | 0 | step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); |
2535 | 0 | step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); |
2536 | 0 | step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); |
2537 | 0 | step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); |
2538 | |
|
2539 | 0 | step1[16] = step2[16]; |
2540 | 0 | step1[17] = step2[17]; |
2541 | 0 | step1[18] = step2[18]; |
2542 | 0 | step1[19] = step2[19]; |
2543 | 0 | temp1 = (-step2[20] + step2[27]) * (tran_high_t)cospi_16_64; |
2544 | 0 | temp2 = (step2[20] + step2[27]) * (tran_high_t)cospi_16_64; |
2545 | 0 | step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2546 | 0 | step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2547 | 0 | temp1 = (-step2[21] + step2[26]) * (tran_high_t)cospi_16_64; |
2548 | 0 | temp2 = (step2[21] + step2[26]) * (tran_high_t)cospi_16_64; |
2549 | 0 | step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2550 | 0 | step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2551 | 0 | temp1 = (-step2[22] + step2[25]) * (tran_high_t)cospi_16_64; |
2552 | 0 | temp2 = (step2[22] + step2[25]) * (tran_high_t)cospi_16_64; |
2553 | 0 | step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2554 | 0 | step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2555 | 0 | temp1 = (-step2[23] + step2[24]) * (tran_high_t)cospi_16_64; |
2556 | 0 | temp2 = (step2[23] + step2[24]) * (tran_high_t)cospi_16_64; |
2557 | 0 | step1[23] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); |
2558 | 0 | step1[24] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); |
2559 | 0 | step1[28] = step2[28]; |
2560 | 0 | step1[29] = step2[29]; |
2561 | 0 | step1[30] = step2[30]; |
2562 | 0 | step1[31] = step2[31]; |
2563 | | |
2564 | | // final stage |
2565 | 0 | output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); |
2566 | 0 | output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); |
2567 | 0 | output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); |
2568 | 0 | output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); |
2569 | 0 | output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); |
2570 | 0 | output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); |
2571 | 0 | output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); |
2572 | 0 | output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); |
2573 | 0 | output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); |
2574 | 0 | output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); |
2575 | 0 | output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); |
2576 | 0 | output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); |
2577 | 0 | output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); |
2578 | 0 | output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); |
2579 | 0 | output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); |
2580 | 0 | output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); |
2581 | 0 | output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); |
2582 | 0 | output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); |
2583 | 0 | output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); |
2584 | 0 | output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); |
2585 | 0 | output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); |
2586 | 0 | output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); |
2587 | 0 | output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); |
2588 | 0 | output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); |
2589 | 0 | output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); |
2590 | 0 | output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); |
2591 | 0 | output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); |
2592 | 0 | output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); |
2593 | 0 | output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); |
2594 | 0 | output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); |
2595 | 0 | output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); |
2596 | 0 | output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); |
2597 | 0 | } |
2598 | | |
2599 | | void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint16_t *dest, |
2600 | 0 | int stride, int bd) { |
2601 | 0 | int i, j; |
2602 | 0 | tran_low_t out[32 * 32]; |
2603 | 0 | tran_low_t *outptr = out; |
2604 | 0 | tran_low_t temp_in[32], temp_out[32]; |
2605 | | |
2606 | | // Rows |
2607 | 0 | for (i = 0; i < 32; ++i) { |
2608 | 0 | tran_low_t zero_coeff = 0; |
2609 | 0 | for (j = 0; j < 32; ++j) zero_coeff |= input[j]; |
2610 | |
|
2611 | 0 | if (zero_coeff) |
2612 | 0 | highbd_idct32_c(input, outptr, bd); |
2613 | 0 | else |
2614 | 0 | memset(outptr, 0, sizeof(tran_low_t) * 32); |
2615 | 0 | input += 32; |
2616 | 0 | outptr += 32; |
2617 | 0 | } |
2618 | | |
2619 | | // Columns |
2620 | 0 | for (i = 0; i < 32; ++i) { |
2621 | 0 | for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; |
2622 | 0 | highbd_idct32_c(temp_in, temp_out, bd); |
2623 | 0 | for (j = 0; j < 32; ++j) { |
2624 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
2625 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2626 | 0 | } |
2627 | 0 | } |
2628 | 0 | } |
2629 | | |
2630 | | void vpx_highbd_idct32x32_135_add_c(const tran_low_t *input, uint16_t *dest, |
2631 | 0 | int stride, int bd) { |
2632 | 0 | int i, j; |
2633 | 0 | tran_low_t out[32 * 32] = { 0 }; |
2634 | 0 | tran_low_t *outptr = out; |
2635 | 0 | tran_low_t temp_in[32], temp_out[32]; |
2636 | | |
2637 | | // Rows |
2638 | | // Only upper-left 16x16 has non-zero coeff |
2639 | 0 | for (i = 0; i < 16; ++i) { |
2640 | 0 | highbd_idct32_c(input, outptr, bd); |
2641 | 0 | input += 32; |
2642 | 0 | outptr += 32; |
2643 | 0 | } |
2644 | | |
2645 | | // Columns |
2646 | 0 | for (i = 0; i < 32; ++i) { |
2647 | 0 | uint16_t *destT = dest; |
2648 | 0 | for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; |
2649 | 0 | highbd_idct32_c(temp_in, temp_out, bd); |
2650 | 0 | for (j = 0; j < 32; ++j) { |
2651 | 0 | destT[i] = highbd_clip_pixel_add(destT[i], |
2652 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2653 | 0 | destT += stride; |
2654 | 0 | } |
2655 | 0 | } |
2656 | 0 | } |
2657 | | |
2658 | | void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint16_t *dest, |
2659 | 0 | int stride, int bd) { |
2660 | 0 | int i, j; |
2661 | 0 | tran_low_t out[32 * 32] = { 0 }; |
2662 | 0 | tran_low_t *outptr = out; |
2663 | 0 | tran_low_t temp_in[32], temp_out[32]; |
2664 | | |
2665 | | // Rows |
2666 | | // Only upper-left 8x8 has non-zero coeff |
2667 | 0 | for (i = 0; i < 8; ++i) { |
2668 | 0 | highbd_idct32_c(input, outptr, bd); |
2669 | 0 | input += 32; |
2670 | 0 | outptr += 32; |
2671 | 0 | } |
2672 | | |
2673 | | // Columns |
2674 | 0 | for (i = 0; i < 32; ++i) { |
2675 | 0 | for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; |
2676 | 0 | highbd_idct32_c(temp_in, temp_out, bd); |
2677 | 0 | for (j = 0; j < 32; ++j) { |
2678 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
2679 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2680 | 0 | } |
2681 | 0 | } |
2682 | 0 | } |
2683 | | |
2684 | | void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint16_t *dest, |
2685 | 0 | int stride, int bd) { |
2686 | 0 | int i, j; |
2687 | 0 | int a1; |
2688 | 0 | tran_low_t out = HIGHBD_WRAPLOW( |
2689 | 0 | dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); |
2690 | |
|
2691 | 0 | out = |
2692 | 0 | HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); |
2693 | 0 | a1 = ROUND_POWER_OF_TWO(out, 6); |
2694 | |
|
2695 | 0 | for (j = 0; j < 32; ++j) { |
2696 | 0 | for (i = 0; i < 32; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2697 | 0 | dest += stride; |
2698 | 0 | } |
2699 | 0 | } |
2700 | | |
2701 | | #endif // CONFIG_VP9_HIGHBITDEPTH |