/src/aom/av1/common/av1_inv_txfm2d.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include "config/aom_dsp_rtcd.h" |
13 | | #include "config/av1_rtcd.h" |
14 | | |
15 | | #include "av1/common/enums.h" |
16 | | #include "av1/common/av1_txfm.h" |
17 | | #include "av1/common/av1_inv_txfm1d.h" |
18 | | #include "av1/common/av1_inv_txfm1d_cfg.h" |
19 | | |
20 | | void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
21 | 0 | int stride, int bd) { |
22 | | /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
23 | | 0.5 shifts per pixel. */ |
24 | 0 | int i; |
25 | 0 | tran_low_t output[16]; |
26 | 0 | tran_low_t a1, b1, c1, d1, e1; |
27 | 0 | const tran_low_t *ip = input; |
28 | 0 | tran_low_t *op = output; |
29 | 0 | uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
30 | |
|
31 | 0 | for (i = 0; i < 4; i++) { |
32 | 0 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
33 | 0 | c1 = ip[1] >> UNIT_QUANT_SHIFT; |
34 | 0 | d1 = ip[2] >> UNIT_QUANT_SHIFT; |
35 | 0 | b1 = ip[3] >> UNIT_QUANT_SHIFT; |
36 | 0 | a1 += c1; |
37 | 0 | d1 -= b1; |
38 | 0 | e1 = (a1 - d1) >> 1; |
39 | 0 | b1 = e1 - b1; |
40 | 0 | c1 = e1 - c1; |
41 | 0 | a1 -= b1; |
42 | 0 | d1 += c1; |
43 | |
|
44 | 0 | op[0] = a1; |
45 | 0 | op[1] = b1; |
46 | 0 | op[2] = c1; |
47 | 0 | op[3] = d1; |
48 | 0 | ip += 4; |
49 | 0 | op += 4; |
50 | 0 | } |
51 | |
|
52 | 0 | ip = output; |
53 | 0 | for (i = 0; i < 4; i++) { |
54 | 0 | a1 = ip[4 * 0]; |
55 | 0 | c1 = ip[4 * 1]; |
56 | 0 | d1 = ip[4 * 2]; |
57 | 0 | b1 = ip[4 * 3]; |
58 | 0 | a1 += c1; |
59 | 0 | d1 -= b1; |
60 | 0 | e1 = (a1 - d1) >> 1; |
61 | 0 | b1 = e1 - b1; |
62 | 0 | c1 = e1 - c1; |
63 | 0 | a1 -= b1; |
64 | 0 | d1 += c1; |
65 | |
|
66 | 0 | range_check_value(a1, bd + 1); |
67 | 0 | range_check_value(b1, bd + 1); |
68 | 0 | range_check_value(c1, bd + 1); |
69 | 0 | range_check_value(d1, bd + 1); |
70 | |
|
71 | 0 | dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); |
72 | 0 | dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); |
73 | 0 | dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); |
74 | 0 | dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); |
75 | |
|
76 | 0 | ip++; |
77 | 0 | dest++; |
78 | 0 | } |
79 | 0 | } |
80 | | |
81 | | void av1_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, |
82 | 0 | int dest_stride, int bd) { |
83 | 0 | int i; |
84 | 0 | tran_low_t a1, e1; |
85 | 0 | tran_low_t tmp[4]; |
86 | 0 | const tran_low_t *ip = in; |
87 | 0 | tran_low_t *op = tmp; |
88 | 0 | uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
89 | 0 | (void)bd; |
90 | |
|
91 | 0 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
92 | 0 | e1 = a1 >> 1; |
93 | 0 | a1 -= e1; |
94 | 0 | op[0] = a1; |
95 | 0 | op[1] = op[2] = op[3] = e1; |
96 | |
|
97 | 0 | ip = tmp; |
98 | 0 | for (i = 0; i < 4; i++) { |
99 | 0 | e1 = ip[0] >> 1; |
100 | 0 | a1 = ip[0] - e1; |
101 | 0 | dest[dest_stride * 0] = |
102 | 0 | highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd); |
103 | 0 | dest[dest_stride * 1] = |
104 | 0 | highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd); |
105 | 0 | dest[dest_stride * 2] = |
106 | 0 | highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd); |
107 | 0 | dest[dest_stride * 3] = |
108 | 0 | highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd); |
109 | 0 | ip++; |
110 | 0 | dest++; |
111 | 0 | } |
112 | 0 | } |
113 | | |
114 | 0 | static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { |
115 | 0 | switch (txfm_type) { |
116 | 0 | case TXFM_TYPE_DCT4: return av1_idct4; |
117 | 0 | case TXFM_TYPE_DCT8: return av1_idct8; |
118 | 0 | case TXFM_TYPE_DCT16: return av1_idct16; |
119 | 0 | case TXFM_TYPE_DCT32: return av1_idct32; |
120 | 0 | case TXFM_TYPE_DCT64: return av1_idct64; |
121 | 0 | case TXFM_TYPE_ADST4: return av1_iadst4; |
122 | 0 | case TXFM_TYPE_ADST8: return av1_iadst8; |
123 | 0 | case TXFM_TYPE_ADST16: return av1_iadst16; |
124 | 0 | case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c; |
125 | 0 | case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c; |
126 | 0 | case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c; |
127 | 0 | case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c; |
128 | 0 | default: assert(0); return NULL; |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | static const int8_t inv_shift_4x4[2] = { 0, -4 }; |
133 | | static const int8_t inv_shift_8x8[2] = { -1, -4 }; |
134 | | static const int8_t inv_shift_16x16[2] = { -2, -4 }; |
135 | | static const int8_t inv_shift_32x32[2] = { -2, -4 }; |
136 | | static const int8_t inv_shift_64x64[2] = { -2, -4 }; |
137 | | static const int8_t inv_shift_4x8[2] = { 0, -4 }; |
138 | | static const int8_t inv_shift_8x4[2] = { 0, -4 }; |
139 | | static const int8_t inv_shift_8x16[2] = { -1, -4 }; |
140 | | static const int8_t inv_shift_16x8[2] = { -1, -4 }; |
141 | | static const int8_t inv_shift_16x32[2] = { -1, -4 }; |
142 | | static const int8_t inv_shift_32x16[2] = { -1, -4 }; |
143 | | static const int8_t inv_shift_32x64[2] = { -1, -4 }; |
144 | | static const int8_t inv_shift_64x32[2] = { -1, -4 }; |
145 | | static const int8_t inv_shift_4x16[2] = { -1, -4 }; |
146 | | static const int8_t inv_shift_16x4[2] = { -1, -4 }; |
147 | | static const int8_t inv_shift_8x32[2] = { -2, -4 }; |
148 | | static const int8_t inv_shift_32x8[2] = { -2, -4 }; |
149 | | static const int8_t inv_shift_16x64[2] = { -2, -4 }; |
150 | | static const int8_t inv_shift_64x16[2] = { -2, -4 }; |
151 | | |
152 | | const int8_t *av1_inv_txfm_shift_ls[TX_SIZES_ALL] = { |
153 | | inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32, |
154 | | inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16, |
155 | | inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64, |
156 | | inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32, |
157 | | inv_shift_32x8, inv_shift_16x64, inv_shift_64x16, |
158 | | }; |
159 | | |
160 | | /* clang-format off */ |
161 | | const int8_t av1_inv_cos_bit_col[MAX_TXWH_IDX] // txw_idx |
162 | | [MAX_TXWH_IDX] = { // txh_idx |
163 | | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, |
164 | | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, |
165 | | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
166 | | { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
167 | | { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } |
168 | | }; |
169 | | |
170 | | const int8_t av1_inv_cos_bit_row[MAX_TXWH_IDX] // txw_idx |
171 | | [MAX_TXWH_IDX] = { // txh_idx |
172 | | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, |
173 | | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, |
174 | | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
175 | | { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
176 | | { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } |
177 | | }; |
178 | | /* clang-format on */ |
179 | | |
180 | | static const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 }; |
181 | | |
182 | | void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, |
183 | 0 | TXFM_2D_FLIP_CFG *cfg) { |
184 | 0 | assert(cfg != NULL); |
185 | 0 | cfg->tx_size = tx_size; |
186 | 0 | av1_zero(cfg->stage_range_col); |
187 | 0 | av1_zero(cfg->stage_range_row); |
188 | 0 | set_flip_cfg(tx_type, cfg); |
189 | 0 | const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; |
190 | 0 | const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; |
191 | 0 | cfg->shift = av1_inv_txfm_shift_ls[tx_size]; |
192 | 0 | const int txw_idx = get_txw_idx(tx_size); |
193 | 0 | const int txh_idx = get_txh_idx(tx_size); |
194 | 0 | cfg->cos_bit_col = av1_inv_cos_bit_col[txw_idx][txh_idx]; |
195 | 0 | cfg->cos_bit_row = av1_inv_cos_bit_row[txw_idx][txh_idx]; |
196 | 0 | cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col]; |
197 | 0 | if (cfg->txfm_type_col == TXFM_TYPE_ADST4) { |
198 | 0 | memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range)); |
199 | 0 | } |
200 | 0 | cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row]; |
201 | 0 | if (cfg->txfm_type_row == TXFM_TYPE_ADST4) { |
202 | 0 | memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range)); |
203 | 0 | } |
204 | 0 | cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col]; |
205 | 0 | cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row]; |
206 | 0 | } |
207 | | |
208 | | void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, |
209 | | const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size, |
210 | 0 | int bd) { |
211 | 0 | const int fwd_shift = inv_start_range[tx_size]; |
212 | 0 | const int8_t *shift = cfg->shift; |
213 | 0 | int8_t opt_range_row, opt_range_col; |
214 | 0 | if (bd == 8) { |
215 | 0 | opt_range_row = 16; |
216 | 0 | opt_range_col = 16; |
217 | 0 | } else if (bd == 10) { |
218 | 0 | opt_range_row = 18; |
219 | 0 | opt_range_col = 16; |
220 | 0 | } else { |
221 | 0 | assert(bd == 12); |
222 | 0 | opt_range_row = 20; |
223 | 0 | opt_range_col = 18; |
224 | 0 | } |
225 | | // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning |
226 | 0 | for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) { |
227 | 0 | int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1; |
228 | 0 | (void)real_range_row; |
229 | 0 | if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) { |
230 | | // the adst4 may use 1 extra bit on top of opt_range_row at stage 1 |
231 | | // so opt_range_row >= real_range_row will not hold |
232 | 0 | stage_range_row[i] = opt_range_row; |
233 | 0 | } else { |
234 | 0 | assert(opt_range_row >= real_range_row); |
235 | 0 | stage_range_row[i] = opt_range_row; |
236 | 0 | } |
237 | 0 | } |
238 | | // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning |
239 | 0 | for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) { |
240 | 0 | int real_range_col = |
241 | 0 | cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1; |
242 | 0 | (void)real_range_col; |
243 | 0 | if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) { |
244 | | // the adst4 may use 1 extra bit on top of opt_range_col at stage 1 |
245 | | // so opt_range_col >= real_range_col will not hold |
246 | 0 | stage_range_col[i] = opt_range_col; |
247 | 0 | } else { |
248 | 0 | assert(opt_range_col >= real_range_col); |
249 | 0 | stage_range_col[i] = opt_range_col; |
250 | 0 | } |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | | static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output, |
255 | | int stride, TXFM_2D_FLIP_CFG *cfg, |
256 | | int32_t *txfm_buf, TX_SIZE tx_size, |
257 | 0 | int bd) { |
258 | | // Note when assigning txfm_size_col, we use the txfm_size from the |
259 | | // row configuration and vice versa. This is intentionally done to |
260 | | // accurately perform rectangular transforms. When the transform is |
261 | | // rectangular, the number of columns will be the same as the |
262 | | // txfm_size stored in the row cfg struct. It will make no difference |
263 | | // for square transforms. |
264 | 0 | const int txfm_size_col = tx_size_wide[cfg->tx_size]; |
265 | 0 | const int txfm_size_row = tx_size_high[cfg->tx_size]; |
266 | | // Take the shift from the larger dimension in the rectangular case. |
267 | 0 | const int8_t *shift = cfg->shift; |
268 | 0 | const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); |
269 | 0 | int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; |
270 | 0 | int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; |
271 | 0 | assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM); |
272 | 0 | assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM); |
273 | 0 | av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd); |
274 | |
|
275 | 0 | const int8_t cos_bit_col = cfg->cos_bit_col; |
276 | 0 | const int8_t cos_bit_row = cfg->cos_bit_row; |
277 | 0 | const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col); |
278 | 0 | const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row); |
279 | | |
280 | | // txfm_buf's length is txfm_size_row * txfm_size_col + 2 * |
281 | | // AOMMAX(txfm_size_row, txfm_size_col) |
282 | | // it is used for intermediate data buffering |
283 | 0 | const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); |
284 | 0 | int32_t *temp_in = txfm_buf; |
285 | 0 | int32_t *temp_out = temp_in + buf_offset; |
286 | 0 | int32_t *buf = temp_out + buf_offset; |
287 | 0 | int32_t *buf_ptr = buf; |
288 | 0 | int c, r; |
289 | | |
290 | | // Rows |
291 | 0 | for (r = 0; r < txfm_size_row; ++r) { |
292 | 0 | if (abs(rect_type) == 1) { |
293 | 0 | for (c = 0; c < txfm_size_col; ++c) { |
294 | 0 | temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits); |
295 | 0 | } |
296 | 0 | clamp_buf(temp_in, txfm_size_col, bd + 8); |
297 | 0 | txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); |
298 | 0 | } else { |
299 | 0 | for (c = 0; c < txfm_size_col; ++c) { |
300 | 0 | temp_in[c] = input[c]; |
301 | 0 | } |
302 | 0 | clamp_buf(temp_in, txfm_size_col, bd + 8); |
303 | 0 | txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); |
304 | 0 | } |
305 | 0 | av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); |
306 | 0 | input += txfm_size_col; |
307 | 0 | buf_ptr += txfm_size_col; |
308 | 0 | } |
309 | | |
310 | | // Columns |
311 | 0 | for (c = 0; c < txfm_size_col; ++c) { |
312 | 0 | if (cfg->lr_flip == 0) { |
313 | 0 | for (r = 0; r < txfm_size_row; ++r) |
314 | 0 | temp_in[r] = buf[r * txfm_size_col + c]; |
315 | 0 | } else { |
316 | | // flip left right |
317 | 0 | for (r = 0; r < txfm_size_row; ++r) |
318 | 0 | temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; |
319 | 0 | } |
320 | 0 | clamp_buf(temp_in, txfm_size_row, AOMMAX(bd + 6, 16)); |
321 | 0 | txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); |
322 | 0 | av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); |
323 | 0 | if (cfg->ud_flip == 0) { |
324 | 0 | for (r = 0; r < txfm_size_row; ++r) { |
325 | 0 | output[r * stride + c] = |
326 | 0 | highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); |
327 | 0 | } |
328 | 0 | } else { |
329 | | // flip upside down |
330 | 0 | for (r = 0; r < txfm_size_row; ++r) { |
331 | 0 | output[r * stride + c] = highbd_clip_pixel_add( |
332 | 0 | output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); |
333 | 0 | } |
334 | 0 | } |
335 | 0 | } |
336 | 0 | } |
337 | | |
338 | | static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output, |
339 | | int stride, int32_t *txfm_buf, |
340 | | TX_TYPE tx_type, TX_SIZE tx_size, |
341 | 0 | int bd) { |
342 | 0 | TXFM_2D_FLIP_CFG cfg; |
343 | 0 | av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg); |
344 | | // Forward shift sum uses larger square size, to be consistent with what |
345 | | // av1_gen_inv_stage_range() does for inverse shifts. |
346 | 0 | inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd); |
347 | 0 | } |
348 | | |
349 | | void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, |
350 | 0 | int stride, TX_TYPE tx_type, int bd) { |
351 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]); |
352 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd); |
353 | 0 | } |
354 | | |
355 | | void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, |
356 | 0 | int stride, TX_TYPE tx_type, int bd) { |
357 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]); |
358 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd); |
359 | 0 | } |
360 | | |
361 | | void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, |
362 | 0 | int stride, TX_TYPE tx_type, int bd) { |
363 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]); |
364 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd); |
365 | 0 | } |
366 | | |
367 | | void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, |
368 | 0 | int stride, TX_TYPE tx_type, int bd) { |
369 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]); |
370 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd); |
371 | 0 | } |
372 | | |
373 | | void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, |
374 | 0 | int stride, TX_TYPE tx_type, int bd) { |
375 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]); |
376 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd); |
377 | 0 | } |
378 | | |
379 | | void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, |
380 | 0 | int stride, TX_TYPE tx_type, int bd) { |
381 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]); |
382 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd); |
383 | 0 | } |
384 | | |
385 | | void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, |
386 | 0 | int stride, TX_TYPE tx_type, int bd) { |
387 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]); |
388 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd); |
389 | 0 | } |
390 | | |
391 | | void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, |
392 | 0 | int stride, TX_TYPE tx_type, int bd) { |
393 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]); |
394 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd); |
395 | 0 | } |
396 | | |
397 | | void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, |
398 | 0 | int stride, TX_TYPE tx_type, int bd) { |
399 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]); |
400 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd); |
401 | 0 | } |
402 | | |
403 | | void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, |
404 | 0 | int stride, TX_TYPE tx_type, int bd) { |
405 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); |
406 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd); |
407 | 0 | } |
408 | | |
409 | | void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, |
410 | 0 | int stride, TX_TYPE tx_type, int bd) { |
411 | | // TODO(urvang): Can the same array be reused, instead of using a new array? |
412 | | // Remap 32x32 input into a modified 64x64 by: |
413 | | // - Copying over these values in top-left 32x32 locations. |
414 | | // - Setting the rest of the locations to 0. |
415 | 0 | int32_t mod_input[64 * 64]; |
416 | 0 | for (int row = 0; row < 32; ++row) { |
417 | 0 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
418 | 0 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
419 | 0 | } |
420 | 0 | memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input)); |
421 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]); |
422 | 0 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64, |
423 | 0 | bd); |
424 | 0 | } |
425 | | |
426 | | void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, |
427 | 0 | int stride, TX_TYPE tx_type, int bd) { |
428 | | // Remap 32x32 input into a modified 64x32 by: |
429 | | // - Copying over these values in top-left 32x32 locations. |
430 | | // - Setting the rest of the locations to 0. |
431 | 0 | int32_t mod_input[64 * 32]; |
432 | 0 | for (int row = 0; row < 32; ++row) { |
433 | 0 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
434 | 0 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
435 | 0 | } |
436 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); |
437 | 0 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32, |
438 | 0 | bd); |
439 | 0 | } |
440 | | |
441 | | void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, |
442 | 0 | int stride, TX_TYPE tx_type, int bd) { |
443 | | // Remap 32x32 input into a modified 32x64 input by: |
444 | | // - Copying over these values in top-left 32x32 locations. |
445 | | // - Setting the rest of the locations to 0. |
446 | 0 | int32_t mod_input[32 * 64]; |
447 | 0 | memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input)); |
448 | 0 | memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input)); |
449 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); |
450 | 0 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64, |
451 | 0 | bd); |
452 | 0 | } |
453 | | |
454 | | void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, |
455 | 0 | int stride, TX_TYPE tx_type, int bd) { |
456 | | // Remap 16x32 input into a modified 16x64 input by: |
457 | | // - Copying over these values in top-left 16x32 locations. |
458 | | // - Setting the rest of the locations to 0. |
459 | 0 | int32_t mod_input[16 * 64]; |
460 | 0 | memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input)); |
461 | 0 | memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input)); |
462 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); |
463 | 0 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64, |
464 | 0 | bd); |
465 | 0 | } |
466 | | |
467 | | void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, |
468 | 0 | int stride, TX_TYPE tx_type, int bd) { |
469 | | // Remap 32x16 input into a modified 64x16 by: |
470 | | // - Copying over these values in top-left 32x16 locations. |
471 | | // - Setting the rest of the locations to 0. |
472 | 0 | int32_t mod_input[64 * 16]; |
473 | 0 | for (int row = 0; row < 16; ++row) { |
474 | 0 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
475 | 0 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
476 | 0 | } |
477 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); |
478 | 0 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16, |
479 | 0 | bd); |
480 | 0 | } |
481 | | |
482 | | void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, |
483 | 0 | int stride, TX_TYPE tx_type, int bd) { |
484 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); |
485 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd); |
486 | 0 | } |
487 | | |
488 | | void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, |
489 | 0 | int stride, TX_TYPE tx_type, int bd) { |
490 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); |
491 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd); |
492 | 0 | } |
493 | | |
494 | | void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, |
495 | 0 | int stride, TX_TYPE tx_type, int bd) { |
496 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); |
497 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd); |
498 | 0 | } |
499 | | |
500 | | void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, |
501 | 0 | int stride, TX_TYPE tx_type, int bd) { |
502 | 0 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); |
503 | 0 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd); |
504 | 0 | } |