/src/aom/av1/encoder/hybrid_fwd_txfm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include "config/aom_config.h" |
13 | | #include "config/av1_rtcd.h" |
14 | | #include "config/aom_dsp_rtcd.h" |
15 | | |
16 | | #include "av1/common/idct.h" |
17 | | #include "av1/common/blockd.h" |
18 | | #include "av1/encoder/hybrid_fwd_txfm.h" |
19 | | |
20 | | /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per |
21 | | pixel. */ |
22 | 40.7M | void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { |
23 | 40.7M | int i; |
24 | 40.7M | tran_high_t a1, b1, c1, d1, e1; |
25 | 40.7M | const int16_t *ip_pass0 = input; |
26 | 40.7M | const tran_low_t *ip = NULL; |
27 | 40.7M | tran_low_t *op = output; |
28 | | |
29 | 203M | for (i = 0; i < 4; i++) { |
30 | 162M | a1 = ip_pass0[0 * stride]; |
31 | 162M | b1 = ip_pass0[1 * stride]; |
32 | 162M | c1 = ip_pass0[2 * stride]; |
33 | 162M | d1 = ip_pass0[3 * stride]; |
34 | | |
35 | 162M | a1 += b1; |
36 | 162M | d1 = d1 - c1; |
37 | 162M | e1 = (a1 - d1) >> 1; |
38 | 162M | b1 = e1 - b1; |
39 | 162M | c1 = e1 - c1; |
40 | 162M | a1 -= c1; |
41 | 162M | d1 += b1; |
42 | 162M | op[0] = (tran_low_t)a1; |
43 | 162M | op[4] = (tran_low_t)c1; |
44 | 162M | op[8] = (tran_low_t)d1; |
45 | 162M | op[12] = (tran_low_t)b1; |
46 | | |
47 | 162M | ip_pass0++; |
48 | 162M | op++; |
49 | 162M | } |
50 | 40.7M | ip = output; |
51 | 40.7M | op = output; |
52 | | |
53 | 203M | for (i = 0; i < 4; i++) { |
54 | 162M | a1 = ip[0]; |
55 | 162M | b1 = ip[1]; |
56 | 162M | c1 = ip[2]; |
57 | 162M | d1 = ip[3]; |
58 | | |
59 | 162M | a1 += b1; |
60 | 162M | d1 -= c1; |
61 | 162M | e1 = (a1 - d1) >> 1; |
62 | 162M | b1 = e1 - b1; |
63 | 162M | c1 = e1 - c1; |
64 | 162M | a1 -= c1; |
65 | 162M | d1 += b1; |
66 | 162M | op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR); |
67 | 162M | op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR); |
68 | 162M | op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR); |
69 | 162M | op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR); |
70 | | |
71 | 162M | ip += 4; |
72 | 162M | op += 4; |
73 | 162M | } |
74 | 40.7M | } |
75 | | |
76 | | void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, |
77 | 40.7M | int stride) { |
78 | 40.7M | av1_fwht4x4_c(input, output, stride); |
79 | 40.7M | } |
80 | | |
81 | | static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, |
82 | 41.8M | int diff_stride, TxfmParam *txfm_param) { |
83 | 41.8M | int32_t *dst_coeff = (int32_t *)coeff; |
84 | 41.8M | const TX_TYPE tx_type = txfm_param->tx_type; |
85 | 41.8M | const int bd = txfm_param->bd; |
86 | 41.8M | if (txfm_param->lossless) { |
87 | 40.7M | assert(tx_type == DCT_DCT); |
88 | 40.7M | av1_highbd_fwht4x4(src_diff, coeff, diff_stride); |
89 | 40.7M | return; |
90 | 40.7M | } |
91 | 1.13M | av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd); |
92 | 1.13M | } |
93 | | |
94 | | static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, |
95 | 119k | int diff_stride, TxfmParam *txfm_param) { |
96 | 119k | int32_t *dst_coeff = (int32_t *)coeff; |
97 | 119k | av1_fwd_txfm2d_4x8(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
98 | 119k | txfm_param->bd); |
99 | 119k | } |
100 | | |
101 | | static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, |
102 | 141k | int diff_stride, TxfmParam *txfm_param) { |
103 | 141k | int32_t *dst_coeff = (int32_t *)coeff; |
104 | 141k | av1_fwd_txfm2d_8x4(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
105 | 141k | txfm_param->bd); |
106 | 141k | } |
107 | | |
108 | | static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, |
109 | 307k | int diff_stride, TxfmParam *txfm_param) { |
110 | 307k | int32_t *dst_coeff = (int32_t *)coeff; |
111 | 307k | const TX_TYPE tx_type = txfm_param->tx_type; |
112 | 307k | const int bd = txfm_param->bd; |
113 | 307k | av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd); |
114 | 307k | } |
115 | | |
116 | | static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, |
117 | 311k | int diff_stride, TxfmParam *txfm_param) { |
118 | 311k | int32_t *dst_coeff = (int32_t *)coeff; |
119 | 311k | const TX_TYPE tx_type = txfm_param->tx_type; |
120 | 311k | const int bd = txfm_param->bd; |
121 | 311k | av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd); |
122 | 311k | } |
123 | | |
124 | | static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, |
125 | 192k | int diff_stride, TxfmParam *txfm_param) { |
126 | 192k | int32_t *dst_coeff = (int32_t *)coeff; |
127 | 192k | av1_fwd_txfm2d_16x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
128 | 192k | txfm_param->bd); |
129 | 192k | } |
130 | | |
131 | | static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, |
132 | 195k | int diff_stride, TxfmParam *txfm_param) { |
133 | 195k | int32_t *dst_coeff = (int32_t *)coeff; |
134 | 195k | av1_fwd_txfm2d_32x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
135 | 195k | txfm_param->bd); |
136 | 195k | } |
137 | | |
138 | | #if !CONFIG_REALTIME_ONLY |
139 | | static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff, |
140 | 0 | int diff_stride, TxfmParam *txfm_param) { |
141 | 0 | int32_t *dst_coeff = (int32_t *)coeff; |
142 | 0 | av1_fwd_txfm2d_16x4(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
143 | 0 | txfm_param->bd); |
144 | 0 | } |
145 | | |
146 | | static void highbd_fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff, |
147 | 798 | int diff_stride, TxfmParam *txfm_param) { |
148 | 798 | int32_t *dst_coeff = (int32_t *)coeff; |
149 | 798 | av1_fwd_txfm2d_4x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
150 | 798 | txfm_param->bd); |
151 | 798 | } |
152 | | |
153 | | static void highbd_fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff, |
154 | 0 | int diff_stride, TxfmParam *txfm_param) { |
155 | 0 | int32_t *dst_coeff = (int32_t *)coeff; |
156 | 0 | av1_fwd_txfm2d_32x8(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
157 | 0 | txfm_param->bd); |
158 | 0 | } |
159 | | |
160 | | static void highbd_fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, |
161 | 2.43k | int diff_stride, TxfmParam *txfm_param) { |
162 | 2.43k | int32_t *dst_coeff = (int32_t *)coeff; |
163 | 2.43k | av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
164 | 2.43k | txfm_param->bd); |
165 | 2.43k | } |
166 | | #endif |
167 | | |
168 | | static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, |
169 | 1.49M | int diff_stride, TxfmParam *txfm_param) { |
170 | 1.49M | int32_t *dst_coeff = (int32_t *)coeff; |
171 | 1.49M | const TX_TYPE tx_type = txfm_param->tx_type; |
172 | 1.49M | const int bd = txfm_param->bd; |
173 | 1.49M | av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd); |
174 | 1.49M | } |
175 | | |
176 | | static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, |
177 | 699k | int diff_stride, TxfmParam *txfm_param) { |
178 | 699k | int32_t *dst_coeff = (int32_t *)coeff; |
179 | 699k | const TX_TYPE tx_type = txfm_param->tx_type; |
180 | 699k | const int bd = txfm_param->bd; |
181 | 699k | av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); |
182 | 699k | } |
183 | | |
184 | | static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, |
185 | 296k | int diff_stride, TxfmParam *txfm_param) { |
186 | 296k | int32_t *dst_coeff = (int32_t *)coeff; |
187 | 296k | const TX_TYPE tx_type = txfm_param->tx_type; |
188 | 296k | const int bd = txfm_param->bd; |
189 | 296k | av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); |
190 | 296k | } |
191 | | |
192 | | static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff, |
193 | 71.9k | int diff_stride, TxfmParam *txfm_param) { |
194 | 71.9k | assert(txfm_param->tx_type == DCT_DCT); |
195 | 71.9k | int32_t *dst_coeff = (int32_t *)coeff; |
196 | 71.9k | const int bd = txfm_param->bd; |
197 | 71.9k | av1_fwd_txfm2d_32x64(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
198 | 71.9k | bd); |
199 | 71.9k | } |
200 | | |
201 | | static void highbd_fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff, |
202 | 69.5k | int diff_stride, TxfmParam *txfm_param) { |
203 | 69.5k | assert(txfm_param->tx_type == DCT_DCT); |
204 | 69.5k | int32_t *dst_coeff = (int32_t *)coeff; |
205 | 69.5k | const int bd = txfm_param->bd; |
206 | 69.5k | av1_fwd_txfm2d_64x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, |
207 | 69.5k | bd); |
208 | 69.5k | } |
209 | | |
210 | | #if !CONFIG_REALTIME_ONLY |
211 | | static void highbd_fwd_txfm_16x64(const int16_t *src_diff, tran_low_t *coeff, |
212 | 0 | int diff_stride, TxfmParam *txfm_param) { |
213 | 0 | assert(txfm_param->tx_type == DCT_DCT); |
214 | 0 | int32_t *dst_coeff = (int32_t *)coeff; |
215 | 0 | const int bd = txfm_param->bd; |
216 | 0 | av1_fwd_txfm2d_16x64(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); |
217 | 0 | } |
218 | | |
219 | | static void highbd_fwd_txfm_64x16(const int16_t *src_diff, tran_low_t *coeff, |
220 | 0 | int diff_stride, TxfmParam *txfm_param) { |
221 | 0 | assert(txfm_param->tx_type == DCT_DCT); |
222 | 0 | int32_t *dst_coeff = (int32_t *)coeff; |
223 | 0 | const int bd = txfm_param->bd; |
224 | 0 | av1_fwd_txfm2d_64x16(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); |
225 | 0 | } |
226 | | #endif |
227 | | |
228 | | static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, |
229 | 222k | int diff_stride, TxfmParam *txfm_param) { |
230 | 222k | assert(txfm_param->tx_type == DCT_DCT); |
231 | 222k | int32_t *dst_coeff = (int32_t *)coeff; |
232 | 222k | const int bd = txfm_param->bd; |
233 | 222k | av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); |
234 | 222k | } |
235 | | |
236 | | void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, |
237 | 45.9M | TxfmParam *txfm_param) { |
238 | 45.9M | if (txfm_param->bd == 8) |
239 | 45.9M | av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); |
240 | 18.4E | else |
241 | 18.4E | av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); |
242 | 45.9M | } |
243 | | |
244 | | void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff, |
245 | 45.9M | int diff_stride, TxfmParam *txfm_param) { |
246 | 45.9M | av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); |
247 | 45.9M | } |
248 | | |
249 | | void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, |
250 | 45.9M | int diff_stride, TxfmParam *txfm_param) { |
251 | 45.9M | assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); |
252 | 45.9M | const TX_SIZE tx_size = txfm_param->tx_size; |
253 | 45.9M | switch (tx_size) { |
254 | 222k | case TX_64X64: |
255 | 222k | highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param); |
256 | 222k | break; |
257 | 71.9k | case TX_32X64: |
258 | 71.9k | highbd_fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param); |
259 | 71.9k | break; |
260 | 69.5k | case TX_64X32: |
261 | 69.5k | highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param); |
262 | 69.5k | break; |
263 | | |
264 | 296k | case TX_32X32: |
265 | 296k | highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param); |
266 | 296k | break; |
267 | 699k | case TX_16X16: |
268 | 699k | highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param); |
269 | 699k | break; |
270 | 1.49M | case TX_8X8: |
271 | 1.49M | highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param); |
272 | 1.49M | break; |
273 | 119k | case TX_4X8: |
274 | 119k | highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param); |
275 | 119k | break; |
276 | 141k | case TX_8X4: |
277 | 141k | highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param); |
278 | 141k | break; |
279 | 307k | case TX_8X16: |
280 | 307k | highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param); |
281 | 307k | break; |
282 | 311k | case TX_16X8: |
283 | 311k | highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param); |
284 | 311k | break; |
285 | 192k | case TX_16X32: |
286 | 192k | highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param); |
287 | 192k | break; |
288 | 195k | case TX_32X16: |
289 | 195k | highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param); |
290 | 195k | break; |
291 | 41.8M | case TX_4X4: |
292 | 41.8M | highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param); |
293 | 41.8M | break; |
294 | 0 | #if !CONFIG_REALTIME_ONLY |
295 | 798 | case TX_4X16: |
296 | 798 | highbd_fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param); |
297 | 798 | break; |
298 | 0 | case TX_16X4: |
299 | 0 | highbd_fwd_txfm_16x4(src_diff, coeff, diff_stride, txfm_param); |
300 | 0 | break; |
301 | 2.43k | case TX_8X32: |
302 | 2.43k | highbd_fwd_txfm_8x32(src_diff, coeff, diff_stride, txfm_param); |
303 | 2.43k | break; |
304 | 0 | case TX_32X8: |
305 | 0 | highbd_fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param); |
306 | 0 | break; |
307 | 0 | case TX_16X64: |
308 | 0 | highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param); |
309 | 0 | break; |
310 | 0 | case TX_64X16: |
311 | 0 | highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param); |
312 | 0 | break; |
313 | 0 | #endif |
314 | 0 | default: assert(0); break; |
315 | 45.9M | } |
316 | 45.9M | } |
317 | | |
318 | | void av1_quick_txfm(int use_hadamard, TX_SIZE tx_size, BitDepthInfo bd_info, |
319 | | const int16_t *src_diff, int src_stride, |
320 | 6.82M | tran_low_t *coeff) { |
321 | 6.82M | if (use_hadamard) { |
322 | 6.59M | switch (tx_size) { |
323 | 388k | case TX_4X4: aom_hadamard_4x4(src_diff, src_stride, coeff); break; |
324 | 2.64M | case TX_8X8: aom_hadamard_8x8(src_diff, src_stride, coeff); break; |
325 | 1.57M | case TX_16X16: aom_hadamard_16x16(src_diff, src_stride, coeff); break; |
326 | 1.99M | case TX_32X32: aom_hadamard_32x32(src_diff, src_stride, coeff); break; |
327 | 0 | default: assert(0); |
328 | 6.59M | } |
329 | 6.59M | } else { |
330 | 230k | TxfmParam txfm_param; |
331 | 230k | txfm_param.tx_type = DCT_DCT; |
332 | 230k | txfm_param.tx_size = tx_size; |
333 | 230k | txfm_param.lossless = 0; |
334 | 230k | txfm_param.bd = bd_info.bit_depth; |
335 | 230k | txfm_param.is_hbd = bd_info.use_highbitdepth_buf; |
336 | 230k | txfm_param.tx_set_type = EXT_TX_SET_ALL16; |
337 | 230k | av1_fwd_txfm(src_diff, coeff, src_stride, &txfm_param); |
338 | 230k | } |
339 | 6.82M | } |