/src/libvpx/vp9/common/vp9_idct.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <math.h> |
12 | | |
13 | | #include "./vp9_rtcd.h" |
14 | | #include "./vpx_dsp_rtcd.h" |
15 | | #include "vp9/common/vp9_blockd.h" |
16 | | #include "vp9/common/vp9_idct.h" |
17 | | #include "vpx_dsp/inv_txfm.h" |
18 | | #include "vpx_ports/mem.h" |
19 | | |
20 | | void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
21 | 0 | int tx_type) { |
22 | 0 | const transform_2d IHT_4[] = { |
23 | 0 | { idct4_c, idct4_c }, // DCT_DCT = 0 |
24 | 0 | { iadst4_c, idct4_c }, // ADST_DCT = 1 |
25 | 0 | { idct4_c, iadst4_c }, // DCT_ADST = 2 |
26 | 0 | { iadst4_c, iadst4_c } // ADST_ADST = 3 |
27 | 0 | }; |
28 | |
|
29 | 0 | int i, j; |
30 | 0 | tran_low_t out[4 * 4]; |
31 | 0 | tran_low_t *outptr = out; |
32 | 0 | tran_low_t temp_in[4], temp_out[4]; |
33 | | |
34 | | // inverse transform row vectors |
35 | 0 | for (i = 0; i < 4; ++i) { |
36 | 0 | IHT_4[tx_type].rows(input, outptr); |
37 | 0 | input += 4; |
38 | 0 | outptr += 4; |
39 | 0 | } |
40 | | |
41 | | // inverse transform column vectors |
42 | 0 | for (i = 0; i < 4; ++i) { |
43 | 0 | for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; |
44 | 0 | IHT_4[tx_type].cols(temp_in, temp_out); |
45 | 0 | for (j = 0; j < 4; ++j) { |
46 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
47 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 4)); |
48 | 0 | } |
49 | 0 | } |
50 | 0 | } |
51 | | |
52 | | static const transform_2d IHT_8[] = { |
53 | | { idct8_c, idct8_c }, // DCT_DCT = 0 |
54 | | { iadst8_c, idct8_c }, // ADST_DCT = 1 |
55 | | { idct8_c, iadst8_c }, // DCT_ADST = 2 |
56 | | { iadst8_c, iadst8_c } // ADST_ADST = 3 |
57 | | }; |
58 | | |
59 | | void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
60 | 0 | int tx_type) { |
61 | 0 | int i, j; |
62 | 0 | tran_low_t out[8 * 8]; |
63 | 0 | tran_low_t *outptr = out; |
64 | 0 | tran_low_t temp_in[8], temp_out[8]; |
65 | 0 | const transform_2d ht = IHT_8[tx_type]; |
66 | | |
67 | | // inverse transform row vectors |
68 | 0 | for (i = 0; i < 8; ++i) { |
69 | 0 | ht.rows(input, outptr); |
70 | 0 | input += 8; |
71 | 0 | outptr += 8; |
72 | 0 | } |
73 | | |
74 | | // inverse transform column vectors |
75 | 0 | for (i = 0; i < 8; ++i) { |
76 | 0 | for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; |
77 | 0 | ht.cols(temp_in, temp_out); |
78 | 0 | for (j = 0; j < 8; ++j) { |
79 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
80 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 5)); |
81 | 0 | } |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | | static const transform_2d IHT_16[] = { |
86 | | { idct16_c, idct16_c }, // DCT_DCT = 0 |
87 | | { iadst16_c, idct16_c }, // ADST_DCT = 1 |
88 | | { idct16_c, iadst16_c }, // DCT_ADST = 2 |
89 | | { iadst16_c, iadst16_c } // ADST_ADST = 3 |
90 | | }; |
91 | | |
92 | | void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
93 | 0 | int tx_type) { |
94 | 0 | int i, j; |
95 | 0 | tran_low_t out[16 * 16]; |
96 | 0 | tran_low_t *outptr = out; |
97 | 0 | tran_low_t temp_in[16], temp_out[16]; |
98 | 0 | const transform_2d ht = IHT_16[tx_type]; |
99 | | |
100 | | // Rows |
101 | 0 | for (i = 0; i < 16; ++i) { |
102 | 0 | ht.rows(input, outptr); |
103 | 0 | input += 16; |
104 | 0 | outptr += 16; |
105 | 0 | } |
106 | | |
107 | | // Columns |
108 | 0 | for (i = 0; i < 16; ++i) { |
109 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
110 | 0 | ht.cols(temp_in, temp_out); |
111 | 0 | for (j = 0; j < 16; ++j) { |
112 | 0 | dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
113 | 0 | ROUND_POWER_OF_TWO(temp_out[j], 6)); |
114 | 0 | } |
115 | 0 | } |
116 | 0 | } |
117 | | |
118 | | // idct |
119 | | void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
120 | 152M | int eob) { |
121 | 152M | if (eob > 1) |
122 | 137M | vpx_idct4x4_16_add(input, dest, stride); |
123 | 14.8M | else |
124 | 14.8M | vpx_idct4x4_1_add(input, dest, stride); |
125 | 152M | } |
126 | | |
127 | | void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
128 | 46.9M | int eob) { |
129 | 46.9M | if (eob > 1) |
130 | 45.0M | vpx_iwht4x4_16_add(input, dest, stride); |
131 | 1.94M | else |
132 | 1.94M | vpx_iwht4x4_1_add(input, dest, stride); |
133 | 46.9M | } |
134 | | |
135 | | void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
136 | 25.1M | int eob) { |
137 | | // If dc is 1, then input[0] is the reconstructed value, do not need |
138 | | // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
139 | | |
140 | | // The calculation can be simplified if there are not many non-zero dct |
141 | | // coefficients. Use eobs to decide what to do. |
142 | 25.1M | if (eob == 1) |
143 | | // DC only DCT coefficient |
144 | 1.38M | vpx_idct8x8_1_add(input, dest, stride); |
145 | 23.7M | else if (eob <= 12) |
146 | 695k | vpx_idct8x8_12_add(input, dest, stride); |
147 | 23.0M | else |
148 | 23.0M | vpx_idct8x8_64_add(input, dest, stride); |
149 | 25.1M | } |
150 | | |
151 | | void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, |
152 | 4.46M | int eob) { |
153 | 4.46M | assert(((intptr_t)input) % 32 == 0); |
154 | | /* The calculation can be simplified if there are not many non-zero dct |
155 | | * coefficients. Use eobs to separate different cases. */ |
156 | 4.46M | if (eob == 1) /* DC only DCT coefficient. */ |
157 | 275k | vpx_idct16x16_1_add(input, dest, stride); |
158 | 4.19M | else if (eob <= 10) |
159 | 26.0k | vpx_idct16x16_10_add(input, dest, stride); |
160 | 4.16M | else if (eob <= 38) |
161 | 29.5k | vpx_idct16x16_38_add(input, dest, stride); |
162 | 4.13M | else |
163 | 4.13M | vpx_idct16x16_256_add(input, dest, stride); |
164 | 4.46M | } |
165 | | |
166 | | void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, |
167 | 2.22M | int eob) { |
168 | 2.22M | assert(((intptr_t)input) % 32 == 0); |
169 | 2.22M | if (eob == 1) |
170 | 145k | vpx_idct32x32_1_add(input, dest, stride); |
171 | 2.08M | else if (eob <= 34) |
172 | | // non-zero coeff only in upper-left 8x8 |
173 | 20.5k | vpx_idct32x32_34_add(input, dest, stride); |
174 | 2.06M | else if (eob <= 135) |
175 | | // non-zero coeff only in upper-left 16x16 |
176 | 22.6k | vpx_idct32x32_135_add(input, dest, stride); |
177 | 2.03M | else |
178 | 2.03M | vpx_idct32x32_1024_add(input, dest, stride); |
179 | 2.22M | } |
180 | | |
181 | | // iht |
182 | | void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
183 | 65.8M | int stride, int eob) { |
184 | 65.8M | if (tx_type == DCT_DCT) |
185 | 18.2M | vp9_idct4x4_add(input, dest, stride, eob); |
186 | 47.5M | else |
187 | 47.5M | vp9_iht4x4_16_add(input, dest, stride, tx_type); |
188 | 65.8M | } |
189 | | |
190 | | void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
191 | 48.1M | int stride, int eob) { |
192 | 48.1M | if (tx_type == DCT_DCT) { |
193 | 21.6M | vp9_idct8x8_add(input, dest, stride, eob); |
194 | 26.4M | } else { |
195 | 26.4M | vp9_iht8x8_64_add(input, dest, stride, tx_type); |
196 | 26.4M | } |
197 | 48.1M | } |
198 | | |
199 | | void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
200 | 9.93M | int stride, int eob) { |
201 | 9.93M | if (tx_type == DCT_DCT) { |
202 | 3.59M | vp9_idct16x16_add(input, dest, stride, eob); |
203 | 6.34M | } else { |
204 | 6.34M | vp9_iht16x16_256_add(input, dest, stride, tx_type); |
205 | 6.34M | } |
206 | 9.93M | } |
207 | | |
208 | | #if CONFIG_VP9_HIGHBITDEPTH |
209 | | |
210 | | void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest, |
211 | 0 | int stride, int tx_type, int bd) { |
212 | 0 | const highbd_transform_2d IHT_4[] = { |
213 | 0 | { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 |
214 | 0 | { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 |
215 | 0 | { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 |
216 | 0 | { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 |
217 | 0 | }; |
218 | |
|
219 | 0 | int i, j; |
220 | 0 | tran_low_t out[4 * 4]; |
221 | 0 | tran_low_t *outptr = out; |
222 | 0 | tran_low_t temp_in[4], temp_out[4]; |
223 | | |
224 | | // Inverse transform row vectors. |
225 | 0 | for (i = 0; i < 4; ++i) { |
226 | 0 | IHT_4[tx_type].rows(input, outptr, bd); |
227 | 0 | input += 4; |
228 | 0 | outptr += 4; |
229 | 0 | } |
230 | | |
231 | | // Inverse transform column vectors. |
232 | 0 | for (i = 0; i < 4; ++i) { |
233 | 0 | for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; |
234 | 0 | IHT_4[tx_type].cols(temp_in, temp_out, bd); |
235 | 0 | for (j = 0; j < 4; ++j) { |
236 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
237 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
238 | 0 | } |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | | static const highbd_transform_2d HIGH_IHT_8[] = { |
243 | | { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 |
244 | | { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 |
245 | | { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 |
246 | | { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 |
247 | | }; |
248 | | |
249 | | void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest, |
250 | 0 | int stride, int tx_type, int bd) { |
251 | 0 | int i, j; |
252 | 0 | tran_low_t out[8 * 8]; |
253 | 0 | tran_low_t *outptr = out; |
254 | 0 | tran_low_t temp_in[8], temp_out[8]; |
255 | 0 | const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; |
256 | | |
257 | | // Inverse transform row vectors. |
258 | 0 | for (i = 0; i < 8; ++i) { |
259 | 0 | ht.rows(input, outptr, bd); |
260 | 0 | input += 8; |
261 | 0 | outptr += 8; |
262 | 0 | } |
263 | | |
264 | | // Inverse transform column vectors. |
265 | 0 | for (i = 0; i < 8; ++i) { |
266 | 0 | for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; |
267 | 0 | ht.cols(temp_in, temp_out, bd); |
268 | 0 | for (j = 0; j < 8; ++j) { |
269 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
270 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
271 | 0 | } |
272 | 0 | } |
273 | 0 | } |
274 | | |
275 | | static const highbd_transform_2d HIGH_IHT_16[] = { |
276 | | { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 |
277 | | { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 |
278 | | { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 |
279 | | { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 |
280 | | }; |
281 | | |
282 | | void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest, |
283 | 0 | int stride, int tx_type, int bd) { |
284 | 0 | int i, j; |
285 | 0 | tran_low_t out[16 * 16]; |
286 | 0 | tran_low_t *outptr = out; |
287 | 0 | tran_low_t temp_in[16], temp_out[16]; |
288 | 0 | const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; |
289 | | |
290 | | // Rows |
291 | 0 | for (i = 0; i < 16; ++i) { |
292 | 0 | ht.rows(input, outptr, bd); |
293 | 0 | input += 16; |
294 | 0 | outptr += 16; |
295 | 0 | } |
296 | | |
297 | | // Columns |
298 | 0 | for (i = 0; i < 16; ++i) { |
299 | 0 | for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; |
300 | 0 | ht.cols(temp_in, temp_out, bd); |
301 | 0 | for (j = 0; j < 16; ++j) { |
302 | 0 | dest[j * stride + i] = highbd_clip_pixel_add( |
303 | 0 | dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
304 | 0 | } |
305 | 0 | } |
306 | 0 | } |
307 | | |
308 | | // idct |
309 | | void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride, |
310 | 0 | int eob, int bd) { |
311 | 0 | if (eob > 1) |
312 | 0 | vpx_highbd_idct4x4_16_add(input, dest, stride, bd); |
313 | 0 | else |
314 | 0 | vpx_highbd_idct4x4_1_add(input, dest, stride, bd); |
315 | 0 | } |
316 | | |
317 | | void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride, |
318 | 0 | int eob, int bd) { |
319 | 0 | if (eob > 1) |
320 | 0 | vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); |
321 | 0 | else |
322 | 0 | vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); |
323 | 0 | } |
324 | | |
325 | | void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride, |
326 | 0 | int eob, int bd) { |
327 | | // If dc is 1, then input[0] is the reconstructed value, do not need |
328 | | // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
329 | | |
330 | | // The calculation can be simplified if there are not many non-zero dct |
331 | | // coefficients. Use eobs to decide what to do. |
332 | | // DC only DCT coefficient |
333 | 0 | if (eob == 1) { |
334 | 0 | vpx_highbd_idct8x8_1_add(input, dest, stride, bd); |
335 | 0 | } else if (eob <= 12) { |
336 | 0 | vpx_highbd_idct8x8_12_add(input, dest, stride, bd); |
337 | 0 | } else { |
338 | 0 | vpx_highbd_idct8x8_64_add(input, dest, stride, bd); |
339 | 0 | } |
340 | 0 | } |
341 | | |
342 | | void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest, |
343 | 0 | int stride, int eob, int bd) { |
344 | | // The calculation can be simplified if there are not many non-zero dct |
345 | | // coefficients. Use eobs to separate different cases. |
346 | | // DC only DCT coefficient. |
347 | 0 | if (eob == 1) { |
348 | 0 | vpx_highbd_idct16x16_1_add(input, dest, stride, bd); |
349 | 0 | } else if (eob <= 10) { |
350 | 0 | vpx_highbd_idct16x16_10_add(input, dest, stride, bd); |
351 | 0 | } else if (eob <= 38) { |
352 | 0 | vpx_highbd_idct16x16_38_add(input, dest, stride, bd); |
353 | 0 | } else { |
354 | 0 | vpx_highbd_idct16x16_256_add(input, dest, stride, bd); |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | | void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest, |
359 | 0 | int stride, int eob, int bd) { |
360 | | // Non-zero coeff only in upper-left 8x8 |
361 | 0 | if (eob == 1) { |
362 | 0 | vpx_highbd_idct32x32_1_add(input, dest, stride, bd); |
363 | 0 | } else if (eob <= 34) { |
364 | 0 | vpx_highbd_idct32x32_34_add(input, dest, stride, bd); |
365 | 0 | } else if (eob <= 135) { |
366 | 0 | vpx_highbd_idct32x32_135_add(input, dest, stride, bd); |
367 | 0 | } else { |
368 | 0 | vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); |
369 | 0 | } |
370 | 0 | } |
371 | | |
372 | | // iht |
373 | | void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, |
374 | 0 | uint16_t *dest, int stride, int eob, int bd) { |
375 | 0 | if (tx_type == DCT_DCT) |
376 | 0 | vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); |
377 | 0 | else |
378 | 0 | vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); |
379 | 0 | } |
380 | | |
381 | | void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, |
382 | 0 | uint16_t *dest, int stride, int eob, int bd) { |
383 | 0 | if (tx_type == DCT_DCT) { |
384 | 0 | vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); |
385 | 0 | } else { |
386 | 0 | vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); |
387 | 0 | } |
388 | 0 | } |
389 | | |
390 | | void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
391 | 0 | uint16_t *dest, int stride, int eob, int bd) { |
392 | 0 | if (tx_type == DCT_DCT) { |
393 | 0 | vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); |
394 | 0 | } else { |
395 | 0 | vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); |
396 | 0 | } |
397 | 0 | } |
398 | | #endif // CONFIG_VP9_HIGHBITDEPTH |