/src/aom/av1/common/av1_inv_txfm1d.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <stdlib.h> |
13 | | #include "av1/common/av1_inv_txfm1d.h" |
14 | | #include "av1/common/av1_txfm.h" |
15 | | |
16 | | void av1_idct4(const int32_t *input, int32_t *output, int8_t cos_bit, |
17 | 3.01M | const int8_t *stage_range) { |
18 | 3.01M | assert(output != input); |
19 | 3.01M | const int32_t size = 4; |
20 | 3.01M | const int32_t *cospi = cospi_arr(cos_bit); |
21 | | |
22 | 3.01M | int32_t stage = 0; |
23 | 3.01M | int32_t *bf0, *bf1; |
24 | 3.01M | int32_t step[4]; |
25 | | |
26 | | // stage 0; |
27 | | |
28 | | // stage 1; |
29 | 3.01M | stage++; |
30 | 3.01M | bf1 = output; |
31 | 3.01M | bf1[0] = input[0]; |
32 | 3.01M | bf1[1] = input[2]; |
33 | 3.01M | bf1[2] = input[1]; |
34 | 3.01M | bf1[3] = input[3]; |
35 | 3.01M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
36 | | |
37 | | // stage 2 |
38 | 3.01M | stage++; |
39 | 3.01M | bf0 = output; |
40 | 3.01M | bf1 = step; |
41 | 3.01M | bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); |
42 | 3.01M | bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); |
43 | 3.01M | bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); |
44 | 3.01M | bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); |
45 | 3.01M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
46 | | |
47 | | // stage 3 |
48 | 3.01M | stage++; |
49 | 3.01M | bf0 = step; |
50 | 3.01M | bf1 = output; |
51 | 3.01M | bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); |
52 | 3.01M | bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); |
53 | 3.01M | bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); |
54 | 3.01M | bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); |
55 | 3.01M | } |
56 | | |
57 | | void av1_idct8(const int32_t *input, int32_t *output, int8_t cos_bit, |
58 | 8.28M | const int8_t *stage_range) { |
59 | 8.28M | assert(output != input); |
60 | 8.28M | const int32_t size = 8; |
61 | 8.28M | const int32_t *cospi = cospi_arr(cos_bit); |
62 | | |
63 | 8.28M | int32_t stage = 0; |
64 | 8.28M | int32_t *bf0, *bf1; |
65 | 8.28M | int32_t step[8]; |
66 | | |
67 | | // stage 0; |
68 | | |
69 | | // stage 1; |
70 | 8.28M | stage++; |
71 | 8.28M | bf1 = output; |
72 | 8.28M | bf1[0] = input[0]; |
73 | 8.28M | bf1[1] = input[4]; |
74 | 8.28M | bf1[2] = input[2]; |
75 | 8.28M | bf1[3] = input[6]; |
76 | 8.28M | bf1[4] = input[1]; |
77 | 8.28M | bf1[5] = input[5]; |
78 | 8.28M | bf1[6] = input[3]; |
79 | 8.28M | bf1[7] = input[7]; |
80 | 8.28M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
81 | | |
82 | | // stage 2 |
83 | 8.28M | stage++; |
84 | 8.28M | bf0 = output; |
85 | 8.28M | bf1 = step; |
86 | 8.28M | bf1[0] = bf0[0]; |
87 | 8.28M | bf1[1] = bf0[1]; |
88 | 8.28M | bf1[2] = bf0[2]; |
89 | 8.28M | bf1[3] = bf0[3]; |
90 | 8.28M | bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); |
91 | 8.28M | bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); |
92 | 8.28M | bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); |
93 | 8.28M | bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); |
94 | 8.28M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
95 | | |
96 | | // stage 3 |
97 | 8.28M | stage++; |
98 | 8.28M | bf0 = step; |
99 | 8.28M | bf1 = output; |
100 | 8.28M | bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); |
101 | 8.28M | bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); |
102 | 8.28M | bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); |
103 | 8.28M | bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); |
104 | 8.28M | bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); |
105 | 8.28M | bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); |
106 | 8.28M | bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); |
107 | 8.28M | bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); |
108 | 8.28M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
109 | | |
110 | | // stage 4 |
111 | 8.28M | stage++; |
112 | 8.28M | bf0 = output; |
113 | 8.28M | bf1 = step; |
114 | 8.28M | bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); |
115 | 8.28M | bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); |
116 | 8.28M | bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); |
117 | 8.28M | bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); |
118 | 8.28M | bf1[4] = bf0[4]; |
119 | 8.28M | bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
120 | 8.28M | bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
121 | 8.28M | bf1[7] = bf0[7]; |
122 | 8.28M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
123 | | |
124 | | // stage 5 |
125 | 8.28M | stage++; |
126 | 8.28M | bf0 = step; |
127 | 8.28M | bf1 = output; |
128 | 8.28M | bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); |
129 | 8.28M | bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); |
130 | 8.28M | bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); |
131 | 8.28M | bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); |
132 | 8.28M | bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); |
133 | 8.28M | bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); |
134 | 8.28M | bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); |
135 | 8.28M | bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); |
136 | 8.28M | } |
137 | | |
138 | | void av1_idct16(const int32_t *input, int32_t *output, int8_t cos_bit, |
139 | 7.14M | const int8_t *stage_range) { |
140 | 7.14M | assert(output != input); |
141 | 7.14M | const int32_t size = 16; |
142 | 7.14M | const int32_t *cospi = cospi_arr(cos_bit); |
143 | | |
144 | 7.14M | int32_t stage = 0; |
145 | 7.14M | int32_t *bf0, *bf1; |
146 | 7.14M | int32_t step[16]; |
147 | | |
148 | | // stage 0; |
149 | | |
150 | | // stage 1; |
151 | 7.14M | stage++; |
152 | 7.14M | bf1 = output; |
153 | 7.14M | bf1[0] = input[0]; |
154 | 7.14M | bf1[1] = input[8]; |
155 | 7.14M | bf1[2] = input[4]; |
156 | 7.14M | bf1[3] = input[12]; |
157 | 7.14M | bf1[4] = input[2]; |
158 | 7.14M | bf1[5] = input[10]; |
159 | 7.14M | bf1[6] = input[6]; |
160 | 7.14M | bf1[7] = input[14]; |
161 | 7.14M | bf1[8] = input[1]; |
162 | 7.14M | bf1[9] = input[9]; |
163 | 7.14M | bf1[10] = input[5]; |
164 | 7.14M | bf1[11] = input[13]; |
165 | 7.14M | bf1[12] = input[3]; |
166 | 7.14M | bf1[13] = input[11]; |
167 | 7.14M | bf1[14] = input[7]; |
168 | 7.14M | bf1[15] = input[15]; |
169 | 7.14M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
170 | | |
171 | | // stage 2 |
172 | 7.14M | stage++; |
173 | 7.14M | bf0 = output; |
174 | 7.14M | bf1 = step; |
175 | 7.14M | bf1[0] = bf0[0]; |
176 | 7.14M | bf1[1] = bf0[1]; |
177 | 7.14M | bf1[2] = bf0[2]; |
178 | 7.14M | bf1[3] = bf0[3]; |
179 | 7.14M | bf1[4] = bf0[4]; |
180 | 7.14M | bf1[5] = bf0[5]; |
181 | 7.14M | bf1[6] = bf0[6]; |
182 | 7.14M | bf1[7] = bf0[7]; |
183 | 7.14M | bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit); |
184 | 7.14M | bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit); |
185 | 7.14M | bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit); |
186 | 7.14M | bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit); |
187 | 7.14M | bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit); |
188 | 7.14M | bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit); |
189 | 7.14M | bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit); |
190 | 7.14M | bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit); |
191 | 7.14M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
192 | | |
193 | | // stage 3 |
194 | 7.14M | stage++; |
195 | 7.14M | bf0 = step; |
196 | 7.14M | bf1 = output; |
197 | 7.14M | bf1[0] = bf0[0]; |
198 | 7.14M | bf1[1] = bf0[1]; |
199 | 7.14M | bf1[2] = bf0[2]; |
200 | 7.14M | bf1[3] = bf0[3]; |
201 | 7.14M | bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); |
202 | 7.14M | bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); |
203 | 7.14M | bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); |
204 | 7.14M | bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); |
205 | 7.14M | bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]); |
206 | 7.14M | bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]); |
207 | 7.14M | bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]); |
208 | 7.14M | bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]); |
209 | 7.14M | bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]); |
210 | 7.14M | bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]); |
211 | 7.14M | bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]); |
212 | 7.14M | bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]); |
213 | 7.14M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
214 | | |
215 | | // stage 4 |
216 | 7.14M | stage++; |
217 | 7.14M | bf0 = output; |
218 | 7.14M | bf1 = step; |
219 | 7.14M | bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); |
220 | 7.14M | bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); |
221 | 7.14M | bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); |
222 | 7.14M | bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); |
223 | 7.14M | bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); |
224 | 7.14M | bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); |
225 | 7.14M | bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); |
226 | 7.14M | bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); |
227 | 7.14M | bf1[8] = bf0[8]; |
228 | 7.14M | bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); |
229 | 7.14M | bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); |
230 | 7.14M | bf1[11] = bf0[11]; |
231 | 7.14M | bf1[12] = bf0[12]; |
232 | 7.14M | bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit); |
233 | 7.14M | bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit); |
234 | 7.14M | bf1[15] = bf0[15]; |
235 | 7.14M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
236 | | |
237 | | // stage 5 |
238 | 7.14M | stage++; |
239 | 7.14M | bf0 = step; |
240 | 7.14M | bf1 = output; |
241 | 7.14M | bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); |
242 | 7.14M | bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); |
243 | 7.14M | bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); |
244 | 7.14M | bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); |
245 | 7.14M | bf1[4] = bf0[4]; |
246 | 7.14M | bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
247 | 7.14M | bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
248 | 7.14M | bf1[7] = bf0[7]; |
249 | 7.14M | bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]); |
250 | 7.14M | bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]); |
251 | 7.14M | bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]); |
252 | 7.14M | bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]); |
253 | 7.14M | bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]); |
254 | 7.14M | bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]); |
255 | 7.14M | bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]); |
256 | 7.14M | bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]); |
257 | 7.14M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
258 | | |
259 | | // stage 6 |
260 | 7.14M | stage++; |
261 | 7.14M | bf0 = output; |
262 | 7.14M | bf1 = step; |
263 | 7.14M | bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); |
264 | 7.14M | bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); |
265 | 7.14M | bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); |
266 | 7.14M | bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); |
267 | 7.14M | bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); |
268 | 7.14M | bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); |
269 | 7.14M | bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); |
270 | 7.14M | bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); |
271 | 7.14M | bf1[8] = bf0[8]; |
272 | 7.14M | bf1[9] = bf0[9]; |
273 | 7.14M | bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); |
274 | 7.14M | bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); |
275 | 7.14M | bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); |
276 | 7.14M | bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); |
277 | 7.14M | bf1[14] = bf0[14]; |
278 | 7.14M | bf1[15] = bf0[15]; |
279 | 7.14M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
280 | | |
281 | | // stage 7 |
282 | 7.14M | stage++; |
283 | 7.14M | bf0 = step; |
284 | 7.14M | bf1 = output; |
285 | 7.14M | bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]); |
286 | 7.14M | bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]); |
287 | 7.14M | bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]); |
288 | 7.14M | bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]); |
289 | 7.14M | bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]); |
290 | 7.14M | bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]); |
291 | 7.14M | bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]); |
292 | 7.14M | bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]); |
293 | 7.14M | bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]); |
294 | 7.14M | bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]); |
295 | 7.14M | bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]); |
296 | 7.14M | bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]); |
297 | 7.14M | bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]); |
298 | 7.14M | bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]); |
299 | 7.14M | bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]); |
300 | 7.14M | bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]); |
301 | 7.14M | } |
302 | | |
303 | | void av1_idct32(const int32_t *input, int32_t *output, int8_t cos_bit, |
304 | 7.32M | const int8_t *stage_range) { |
305 | 7.32M | assert(output != input); |
306 | 7.32M | const int32_t size = 32; |
307 | 7.32M | const int32_t *cospi = cospi_arr(cos_bit); |
308 | | |
309 | 7.32M | int32_t stage = 0; |
310 | 7.32M | int32_t *bf0, *bf1; |
311 | 7.32M | int32_t step[32]; |
312 | | |
313 | | // stage 0; |
314 | | |
315 | | // stage 1; |
316 | 7.32M | stage++; |
317 | 7.32M | bf1 = output; |
318 | 7.32M | bf1[0] = input[0]; |
319 | 7.32M | bf1[1] = input[16]; |
320 | 7.32M | bf1[2] = input[8]; |
321 | 7.32M | bf1[3] = input[24]; |
322 | 7.32M | bf1[4] = input[4]; |
323 | 7.32M | bf1[5] = input[20]; |
324 | 7.32M | bf1[6] = input[12]; |
325 | 7.32M | bf1[7] = input[28]; |
326 | 7.32M | bf1[8] = input[2]; |
327 | 7.32M | bf1[9] = input[18]; |
328 | 7.32M | bf1[10] = input[10]; |
329 | 7.32M | bf1[11] = input[26]; |
330 | 7.32M | bf1[12] = input[6]; |
331 | 7.32M | bf1[13] = input[22]; |
332 | 7.32M | bf1[14] = input[14]; |
333 | 7.32M | bf1[15] = input[30]; |
334 | 7.32M | bf1[16] = input[1]; |
335 | 7.32M | bf1[17] = input[17]; |
336 | 7.32M | bf1[18] = input[9]; |
337 | 7.32M | bf1[19] = input[25]; |
338 | 7.32M | bf1[20] = input[5]; |
339 | 7.32M | bf1[21] = input[21]; |
340 | 7.32M | bf1[22] = input[13]; |
341 | 7.32M | bf1[23] = input[29]; |
342 | 7.32M | bf1[24] = input[3]; |
343 | 7.32M | bf1[25] = input[19]; |
344 | 7.32M | bf1[26] = input[11]; |
345 | 7.32M | bf1[27] = input[27]; |
346 | 7.32M | bf1[28] = input[7]; |
347 | 7.32M | bf1[29] = input[23]; |
348 | 7.32M | bf1[30] = input[15]; |
349 | 7.32M | bf1[31] = input[31]; |
350 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
351 | | |
352 | | // stage 2 |
353 | 7.32M | stage++; |
354 | 7.32M | bf0 = output; |
355 | 7.32M | bf1 = step; |
356 | 7.32M | bf1[0] = bf0[0]; |
357 | 7.32M | bf1[1] = bf0[1]; |
358 | 7.32M | bf1[2] = bf0[2]; |
359 | 7.32M | bf1[3] = bf0[3]; |
360 | 7.32M | bf1[4] = bf0[4]; |
361 | 7.32M | bf1[5] = bf0[5]; |
362 | 7.32M | bf1[6] = bf0[6]; |
363 | 7.32M | bf1[7] = bf0[7]; |
364 | 7.32M | bf1[8] = bf0[8]; |
365 | 7.32M | bf1[9] = bf0[9]; |
366 | 7.32M | bf1[10] = bf0[10]; |
367 | 7.32M | bf1[11] = bf0[11]; |
368 | 7.32M | bf1[12] = bf0[12]; |
369 | 7.32M | bf1[13] = bf0[13]; |
370 | 7.32M | bf1[14] = bf0[14]; |
371 | 7.32M | bf1[15] = bf0[15]; |
372 | 7.32M | bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit); |
373 | 7.32M | bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit); |
374 | 7.32M | bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit); |
375 | 7.32M | bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit); |
376 | 7.32M | bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit); |
377 | 7.32M | bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit); |
378 | 7.32M | bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit); |
379 | 7.32M | bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit); |
380 | 7.32M | bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit); |
381 | 7.32M | bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit); |
382 | 7.32M | bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit); |
383 | 7.32M | bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit); |
384 | 7.32M | bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit); |
385 | 7.32M | bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit); |
386 | 7.32M | bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit); |
387 | 7.32M | bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit); |
388 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
389 | | |
390 | | // stage 3 |
391 | 7.32M | stage++; |
392 | 7.32M | bf0 = step; |
393 | 7.32M | bf1 = output; |
394 | 7.32M | bf1[0] = bf0[0]; |
395 | 7.32M | bf1[1] = bf0[1]; |
396 | 7.32M | bf1[2] = bf0[2]; |
397 | 7.32M | bf1[3] = bf0[3]; |
398 | 7.32M | bf1[4] = bf0[4]; |
399 | 7.32M | bf1[5] = bf0[5]; |
400 | 7.32M | bf1[6] = bf0[6]; |
401 | 7.32M | bf1[7] = bf0[7]; |
402 | 7.32M | bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit); |
403 | 7.32M | bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit); |
404 | 7.32M | bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit); |
405 | 7.32M | bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit); |
406 | 7.32M | bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit); |
407 | 7.32M | bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit); |
408 | 7.32M | bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit); |
409 | 7.32M | bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit); |
410 | 7.32M | bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]); |
411 | 7.32M | bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]); |
412 | 7.32M | bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]); |
413 | 7.32M | bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]); |
414 | 7.32M | bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]); |
415 | 7.32M | bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]); |
416 | 7.32M | bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]); |
417 | 7.32M | bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]); |
418 | 7.32M | bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]); |
419 | 7.32M | bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]); |
420 | 7.32M | bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]); |
421 | 7.32M | bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]); |
422 | 7.32M | bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]); |
423 | 7.32M | bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]); |
424 | 7.32M | bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]); |
425 | 7.32M | bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]); |
426 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
427 | | |
428 | | // stage 4 |
429 | 7.32M | stage++; |
430 | 7.32M | bf0 = output; |
431 | 7.32M | bf1 = step; |
432 | 7.32M | bf1[0] = bf0[0]; |
433 | 7.32M | bf1[1] = bf0[1]; |
434 | 7.32M | bf1[2] = bf0[2]; |
435 | 7.32M | bf1[3] = bf0[3]; |
436 | 7.32M | bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); |
437 | 7.32M | bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); |
438 | 7.32M | bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); |
439 | 7.32M | bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); |
440 | 7.32M | bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]); |
441 | 7.32M | bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]); |
442 | 7.32M | bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]); |
443 | 7.32M | bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]); |
444 | 7.32M | bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]); |
445 | 7.32M | bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]); |
446 | 7.32M | bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]); |
447 | 7.32M | bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]); |
448 | 7.32M | bf1[16] = bf0[16]; |
449 | 7.32M | bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); |
450 | 7.32M | bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); |
451 | 7.32M | bf1[19] = bf0[19]; |
452 | 7.32M | bf1[20] = bf0[20]; |
453 | 7.32M | bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); |
454 | 7.32M | bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); |
455 | 7.32M | bf1[23] = bf0[23]; |
456 | 7.32M | bf1[24] = bf0[24]; |
457 | 7.32M | bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit); |
458 | 7.32M | bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit); |
459 | 7.32M | bf1[27] = bf0[27]; |
460 | 7.32M | bf1[28] = bf0[28]; |
461 | 7.32M | bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit); |
462 | 7.32M | bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit); |
463 | 7.32M | bf1[31] = bf0[31]; |
464 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
465 | | |
466 | | // stage 5 |
467 | 7.32M | stage++; |
468 | 7.32M | bf0 = step; |
469 | 7.32M | bf1 = output; |
470 | 7.32M | bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); |
471 | 7.32M | bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); |
472 | 7.32M | bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); |
473 | 7.32M | bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); |
474 | 7.32M | bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); |
475 | 7.32M | bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); |
476 | 7.32M | bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); |
477 | 7.32M | bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); |
478 | 7.32M | bf1[8] = bf0[8]; |
479 | 7.32M | bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); |
480 | 7.32M | bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); |
481 | 7.32M | bf1[11] = bf0[11]; |
482 | 7.32M | bf1[12] = bf0[12]; |
483 | 7.32M | bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit); |
484 | 7.32M | bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit); |
485 | 7.32M | bf1[15] = bf0[15]; |
486 | 7.32M | bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]); |
487 | 7.32M | bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]); |
488 | 7.32M | bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]); |
489 | 7.32M | bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]); |
490 | 7.32M | bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]); |
491 | 7.32M | bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]); |
492 | 7.32M | bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]); |
493 | 7.32M | bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]); |
494 | 7.32M | bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]); |
495 | 7.32M | bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]); |
496 | 7.32M | bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]); |
497 | 7.32M | bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]); |
498 | 7.32M | bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]); |
499 | 7.32M | bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]); |
500 | 7.32M | bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]); |
501 | 7.32M | bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]); |
502 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
503 | | |
504 | | // stage 6 |
505 | 7.32M | stage++; |
506 | 7.32M | bf0 = output; |
507 | 7.32M | bf1 = step; |
508 | 7.32M | bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); |
509 | 7.32M | bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); |
510 | 7.32M | bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); |
511 | 7.32M | bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); |
512 | 7.32M | bf1[4] = bf0[4]; |
513 | 7.32M | bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
514 | 7.32M | bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
515 | 7.32M | bf1[7] = bf0[7]; |
516 | 7.32M | bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]); |
517 | 7.32M | bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]); |
518 | 7.32M | bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]); |
519 | 7.32M | bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]); |
520 | 7.32M | bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]); |
521 | 7.32M | bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]); |
522 | 7.32M | bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]); |
523 | 7.32M | bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]); |
524 | 7.32M | bf1[16] = bf0[16]; |
525 | 7.32M | bf1[17] = bf0[17]; |
526 | 7.32M | bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); |
527 | 7.32M | bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); |
528 | 7.32M | bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); |
529 | 7.32M | bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); |
530 | 7.32M | bf1[22] = bf0[22]; |
531 | 7.32M | bf1[23] = bf0[23]; |
532 | 7.32M | bf1[24] = bf0[24]; |
533 | 7.32M | bf1[25] = bf0[25]; |
534 | 7.32M | bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit); |
535 | 7.32M | bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit); |
536 | 7.32M | bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit); |
537 | 7.32M | bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit); |
538 | 7.32M | bf1[30] = bf0[30]; |
539 | 7.32M | bf1[31] = bf0[31]; |
540 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
541 | | |
542 | | // stage 7 |
543 | 7.32M | stage++; |
544 | 7.32M | bf0 = step; |
545 | 7.32M | bf1 = output; |
546 | 7.32M | bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); |
547 | 7.32M | bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); |
548 | 7.32M | bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); |
549 | 7.32M | bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); |
550 | 7.32M | bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); |
551 | 7.32M | bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); |
552 | 7.32M | bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); |
553 | 7.32M | bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); |
554 | 7.32M | bf1[8] = bf0[8]; |
555 | 7.32M | bf1[9] = bf0[9]; |
556 | 7.32M | bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); |
557 | 7.32M | bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); |
558 | 7.32M | bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); |
559 | 7.32M | bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); |
560 | 7.32M | bf1[14] = bf0[14]; |
561 | 7.32M | bf1[15] = bf0[15]; |
562 | 7.32M | bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]); |
563 | 7.32M | bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]); |
564 | 7.32M | bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]); |
565 | 7.32M | bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]); |
566 | 7.32M | bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]); |
567 | 7.32M | bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]); |
568 | 7.32M | bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]); |
569 | 7.32M | bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]); |
570 | 7.32M | bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]); |
571 | 7.32M | bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]); |
572 | 7.32M | bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]); |
573 | 7.32M | bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]); |
574 | 7.32M | bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]); |
575 | 7.32M | bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]); |
576 | 7.32M | bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]); |
577 | 7.32M | bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]); |
578 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
579 | | |
580 | | // stage 8 |
581 | 7.32M | stage++; |
582 | 7.32M | bf0 = output; |
583 | 7.32M | bf1 = step; |
584 | 7.32M | bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]); |
585 | 7.32M | bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]); |
586 | 7.32M | bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]); |
587 | 7.32M | bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]); |
588 | 7.32M | bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]); |
589 | 7.32M | bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]); |
590 | 7.32M | bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]); |
591 | 7.32M | bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]); |
592 | 7.32M | bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]); |
593 | 7.32M | bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]); |
594 | 7.32M | bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]); |
595 | 7.32M | bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]); |
596 | 7.32M | bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]); |
597 | 7.32M | bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]); |
598 | 7.32M | bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]); |
599 | 7.32M | bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]); |
600 | 7.32M | bf1[16] = bf0[16]; |
601 | 7.32M | bf1[17] = bf0[17]; |
602 | 7.32M | bf1[18] = bf0[18]; |
603 | 7.32M | bf1[19] = bf0[19]; |
604 | 7.32M | bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); |
605 | 7.32M | bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); |
606 | 7.32M | bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); |
607 | 7.32M | bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); |
608 | 7.32M | bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); |
609 | 7.32M | bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); |
610 | 7.32M | bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); |
611 | 7.32M | bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); |
612 | 7.32M | bf1[28] = bf0[28]; |
613 | 7.32M | bf1[29] = bf0[29]; |
614 | 7.32M | bf1[30] = bf0[30]; |
615 | 7.32M | bf1[31] = bf0[31]; |
616 | 7.32M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
617 | | |
618 | | // stage 9 |
619 | 7.32M | stage++; |
620 | 7.32M | bf0 = step; |
621 | 7.32M | bf1 = output; |
622 | 7.32M | bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]); |
623 | 7.32M | bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]); |
624 | 7.32M | bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]); |
625 | 7.32M | bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]); |
626 | 7.32M | bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]); |
627 | 7.32M | bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]); |
628 | 7.32M | bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]); |
629 | 7.32M | bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]); |
630 | 7.32M | bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]); |
631 | 7.32M | bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]); |
632 | 7.32M | bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]); |
633 | 7.32M | bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]); |
634 | 7.32M | bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]); |
635 | 7.32M | bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]); |
636 | 7.32M | bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]); |
637 | 7.32M | bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]); |
638 | 7.32M | bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]); |
639 | 7.32M | bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]); |
640 | 7.32M | bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]); |
641 | 7.32M | bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]); |
642 | 7.32M | bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]); |
643 | 7.32M | bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]); |
644 | 7.32M | bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]); |
645 | 7.32M | bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]); |
646 | 7.32M | bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]); |
647 | 7.32M | bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]); |
648 | 7.32M | bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]); |
649 | 7.32M | bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]); |
650 | 7.32M | bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]); |
651 | 7.32M | bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]); |
652 | 7.32M | bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]); |
653 | 7.32M | bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]); |
654 | 7.32M | } |
655 | | |
656 | | void av1_iadst4(const int32_t *input, int32_t *output, int8_t cos_bit, |
657 | 2.39M | const int8_t *stage_range) { |
658 | 2.39M | int bit = cos_bit; |
659 | 2.39M | const int32_t *sinpi = sinpi_arr(bit); |
660 | 2.39M | int32_t s0, s1, s2, s3, s4, s5, s6, s7; |
661 | | |
662 | 2.39M | int32_t x0 = input[0]; |
663 | 2.39M | int32_t x1 = input[1]; |
664 | 2.39M | int32_t x2 = input[2]; |
665 | 2.39M | int32_t x3 = input[3]; |
666 | | |
667 | 2.39M | if (!(x0 | x1 | x2 | x3)) { |
668 | 519k | output[0] = output[1] = output[2] = output[3] = 0; |
669 | 519k | return; |
670 | 519k | } |
671 | | |
672 | 1.87M | assert(sinpi[1] + sinpi[2] == sinpi[4]); |
673 | | |
674 | | // stage 1 |
675 | 1.87M | s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit); |
676 | 1.87M | s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit); |
677 | 1.87M | s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit); |
678 | 1.87M | s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit); |
679 | 1.87M | s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit); |
680 | 1.87M | s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit); |
681 | 1.87M | s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit); |
682 | | |
683 | | // stage 2 |
684 | | // NOTICE: (x0 - x2) here may use one extra bit compared to the |
685 | | // opt_range_row/col specified in av1_gen_inv_stage_range() |
686 | 1.87M | s7 = range_check_value((x0 - x2) + x3, stage_range[2]); |
687 | | |
688 | | // stage 3 |
689 | 1.87M | s0 = range_check_value(s0 + s3, stage_range[3] + bit); |
690 | 1.87M | s1 = range_check_value(s1 - s4, stage_range[3] + bit); |
691 | 1.87M | s3 = range_check_value(s2, stage_range[3] + bit); |
692 | 1.87M | s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit); |
693 | | |
694 | | // stage 4 |
695 | 1.87M | s0 = range_check_value(s0 + s5, stage_range[4] + bit); |
696 | 1.87M | s1 = range_check_value(s1 - s6, stage_range[4] + bit); |
697 | | |
698 | | // stage 5 |
699 | 1.87M | x0 = range_check_value(s0 + s3, stage_range[5] + bit); |
700 | 1.87M | x1 = range_check_value(s1 + s3, stage_range[5] + bit); |
701 | 1.87M | x2 = range_check_value(s2, stage_range[5] + bit); |
702 | 1.87M | x3 = range_check_value(s0 + s1, stage_range[5] + bit); |
703 | | |
704 | | // stage 6 |
705 | 1.87M | x3 = range_check_value(x3 - s3, stage_range[6] + bit); |
706 | | |
707 | 1.87M | output[0] = round_shift(x0, bit); |
708 | 1.87M | output[1] = round_shift(x1, bit); |
709 | 1.87M | output[2] = round_shift(x2, bit); |
710 | 1.87M | output[3] = round_shift(x3, bit); |
711 | 1.87M | } |
712 | | |
713 | | void av1_iadst8(const int32_t *input, int32_t *output, int8_t cos_bit, |
714 | 3.79M | const int8_t *stage_range) { |
715 | 3.79M | assert(output != input); |
716 | 3.79M | const int32_t size = 8; |
717 | 3.79M | const int32_t *cospi = cospi_arr(cos_bit); |
718 | | |
719 | 3.79M | int32_t stage = 0; |
720 | 3.79M | int32_t *bf0, *bf1; |
721 | 3.79M | int32_t step[8]; |
722 | | |
723 | | // stage 0; |
724 | | |
725 | | // stage 1; |
726 | 3.79M | stage++; |
727 | 3.79M | bf1 = output; |
728 | 3.79M | bf1[0] = input[7]; |
729 | 3.79M | bf1[1] = input[0]; |
730 | 3.79M | bf1[2] = input[5]; |
731 | 3.79M | bf1[3] = input[2]; |
732 | 3.79M | bf1[4] = input[3]; |
733 | 3.79M | bf1[5] = input[4]; |
734 | 3.79M | bf1[6] = input[1]; |
735 | 3.79M | bf1[7] = input[6]; |
736 | 3.79M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
737 | | |
738 | | // stage 2 |
739 | 3.79M | stage++; |
740 | 3.79M | bf0 = output; |
741 | 3.79M | bf1 = step; |
742 | 3.79M | bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit); |
743 | 3.79M | bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit); |
744 | 3.79M | bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit); |
745 | 3.79M | bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit); |
746 | 3.79M | bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit); |
747 | 3.79M | bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit); |
748 | 3.79M | bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit); |
749 | 3.79M | bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit); |
750 | 3.79M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
751 | | |
752 | | // stage 3 |
753 | 3.79M | stage++; |
754 | 3.79M | bf0 = step; |
755 | 3.79M | bf1 = output; |
756 | 3.79M | bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]); |
757 | 3.79M | bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]); |
758 | 3.79M | bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]); |
759 | 3.79M | bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]); |
760 | 3.79M | bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]); |
761 | 3.79M | bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]); |
762 | 3.79M | bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]); |
763 | 3.79M | bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]); |
764 | 3.79M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
765 | | |
766 | | // stage 4 |
767 | 3.79M | stage++; |
768 | 3.79M | bf0 = output; |
769 | 3.79M | bf1 = step; |
770 | 3.79M | bf1[0] = bf0[0]; |
771 | 3.79M | bf1[1] = bf0[1]; |
772 | 3.79M | bf1[2] = bf0[2]; |
773 | 3.79M | bf1[3] = bf0[3]; |
774 | 3.79M | bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); |
775 | 3.79M | bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); |
776 | 3.79M | bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); |
777 | 3.79M | bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); |
778 | 3.79M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
779 | | |
780 | | // stage 5 |
781 | 3.79M | stage++; |
782 | 3.79M | bf0 = step; |
783 | 3.79M | bf1 = output; |
784 | 3.79M | bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]); |
785 | 3.79M | bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]); |
786 | 3.79M | bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]); |
787 | 3.79M | bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]); |
788 | 3.79M | bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]); |
789 | 3.79M | bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]); |
790 | 3.79M | bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]); |
791 | 3.79M | bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]); |
792 | 3.79M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
793 | | |
794 | | // stage 6 |
795 | 3.79M | stage++; |
796 | 3.79M | bf0 = output; |
797 | 3.79M | bf1 = step; |
798 | 3.79M | bf1[0] = bf0[0]; |
799 | 3.79M | bf1[1] = bf0[1]; |
800 | 3.79M | bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); |
801 | 3.79M | bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); |
802 | 3.79M | bf1[4] = bf0[4]; |
803 | 3.79M | bf1[5] = bf0[5]; |
804 | 3.79M | bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); |
805 | 3.79M | bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); |
806 | 3.79M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
807 | | |
808 | | // stage 7 |
809 | 3.79M | bf0 = step; |
810 | 3.79M | bf1 = output; |
811 | 3.79M | bf1[0] = bf0[0]; |
812 | 3.79M | bf1[1] = -bf0[4]; |
813 | 3.79M | bf1[2] = bf0[6]; |
814 | 3.79M | bf1[3] = -bf0[2]; |
815 | 3.79M | bf1[4] = bf0[3]; |
816 | 3.79M | bf1[5] = -bf0[7]; |
817 | 3.79M | bf1[6] = bf0[5]; |
818 | 3.79M | bf1[7] = -bf0[1]; |
819 | 3.79M | } |
820 | | |
821 | | void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit, |
822 | 3.20M | const int8_t *stage_range) { |
823 | 3.20M | assert(output != input); |
824 | 3.20M | const int32_t size = 16; |
825 | 3.20M | const int32_t *cospi = cospi_arr(cos_bit); |
826 | | |
827 | 3.20M | int32_t stage = 0; |
828 | 3.20M | int32_t *bf0, *bf1; |
829 | 3.20M | int32_t step[16]; |
830 | | |
831 | | // stage 0; |
832 | | |
833 | | // stage 1; |
834 | 3.20M | stage++; |
835 | 3.20M | bf1 = output; |
836 | 3.20M | bf1[0] = input[15]; |
837 | 3.20M | bf1[1] = input[0]; |
838 | 3.20M | bf1[2] = input[13]; |
839 | 3.20M | bf1[3] = input[2]; |
840 | 3.20M | bf1[4] = input[11]; |
841 | 3.20M | bf1[5] = input[4]; |
842 | 3.20M | bf1[6] = input[9]; |
843 | 3.20M | bf1[7] = input[6]; |
844 | 3.20M | bf1[8] = input[7]; |
845 | 3.20M | bf1[9] = input[8]; |
846 | 3.20M | bf1[10] = input[5]; |
847 | 3.20M | bf1[11] = input[10]; |
848 | 3.20M | bf1[12] = input[3]; |
849 | 3.20M | bf1[13] = input[12]; |
850 | 3.20M | bf1[14] = input[1]; |
851 | 3.20M | bf1[15] = input[14]; |
852 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
853 | | |
854 | | // stage 2 |
855 | 3.20M | stage++; |
856 | 3.20M | bf0 = output; |
857 | 3.20M | bf1 = step; |
858 | 3.20M | bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit); |
859 | 3.20M | bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit); |
860 | 3.20M | bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit); |
861 | 3.20M | bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit); |
862 | 3.20M | bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit); |
863 | 3.20M | bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit); |
864 | 3.20M | bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit); |
865 | 3.20M | bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit); |
866 | 3.20M | bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit); |
867 | 3.20M | bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit); |
868 | 3.20M | bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit); |
869 | 3.20M | bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit); |
870 | 3.20M | bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit); |
871 | 3.20M | bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit); |
872 | 3.20M | bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit); |
873 | 3.20M | bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit); |
874 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
875 | | |
876 | | // stage 3 |
877 | 3.20M | stage++; |
878 | 3.20M | bf0 = step; |
879 | 3.20M | bf1 = output; |
880 | 3.20M | bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]); |
881 | 3.20M | bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]); |
882 | 3.20M | bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]); |
883 | 3.20M | bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]); |
884 | 3.20M | bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]); |
885 | 3.20M | bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]); |
886 | 3.20M | bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]); |
887 | 3.20M | bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]); |
888 | 3.20M | bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]); |
889 | 3.20M | bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]); |
890 | 3.20M | bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]); |
891 | 3.20M | bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]); |
892 | 3.20M | bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]); |
893 | 3.20M | bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]); |
894 | 3.20M | bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]); |
895 | 3.20M | bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]); |
896 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
897 | | |
898 | | // stage 4 |
899 | 3.20M | stage++; |
900 | 3.20M | bf0 = output; |
901 | 3.20M | bf1 = step; |
902 | 3.20M | bf1[0] = bf0[0]; |
903 | 3.20M | bf1[1] = bf0[1]; |
904 | 3.20M | bf1[2] = bf0[2]; |
905 | 3.20M | bf1[3] = bf0[3]; |
906 | 3.20M | bf1[4] = bf0[4]; |
907 | 3.20M | bf1[5] = bf0[5]; |
908 | 3.20M | bf1[6] = bf0[6]; |
909 | 3.20M | bf1[7] = bf0[7]; |
910 | 3.20M | bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit); |
911 | 3.20M | bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit); |
912 | 3.20M | bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit); |
913 | 3.20M | bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit); |
914 | 3.20M | bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit); |
915 | 3.20M | bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit); |
916 | 3.20M | bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit); |
917 | 3.20M | bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit); |
918 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
919 | | |
920 | | // stage 5 |
921 | 3.20M | stage++; |
922 | 3.20M | bf0 = step; |
923 | 3.20M | bf1 = output; |
924 | 3.20M | bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]); |
925 | 3.20M | bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]); |
926 | 3.20M | bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]); |
927 | 3.20M | bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]); |
928 | 3.20M | bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]); |
929 | 3.20M | bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]); |
930 | 3.20M | bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]); |
931 | 3.20M | bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]); |
932 | 3.20M | bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]); |
933 | 3.20M | bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]); |
934 | 3.20M | bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]); |
935 | 3.20M | bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]); |
936 | 3.20M | bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]); |
937 | 3.20M | bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]); |
938 | 3.20M | bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]); |
939 | 3.20M | bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]); |
940 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
941 | | |
942 | | // stage 6 |
943 | 3.20M | stage++; |
944 | 3.20M | bf0 = output; |
945 | 3.20M | bf1 = step; |
946 | 3.20M | bf1[0] = bf0[0]; |
947 | 3.20M | bf1[1] = bf0[1]; |
948 | 3.20M | bf1[2] = bf0[2]; |
949 | 3.20M | bf1[3] = bf0[3]; |
950 | 3.20M | bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); |
951 | 3.20M | bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); |
952 | 3.20M | bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); |
953 | 3.20M | bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); |
954 | 3.20M | bf1[8] = bf0[8]; |
955 | 3.20M | bf1[9] = bf0[9]; |
956 | 3.20M | bf1[10] = bf0[10]; |
957 | 3.20M | bf1[11] = bf0[11]; |
958 | 3.20M | bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit); |
959 | 3.20M | bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit); |
960 | 3.20M | bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit); |
961 | 3.20M | bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit); |
962 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
963 | | |
964 | | // stage 7 |
965 | 3.20M | stage++; |
966 | 3.20M | bf0 = step; |
967 | 3.20M | bf1 = output; |
968 | 3.20M | bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]); |
969 | 3.20M | bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]); |
970 | 3.20M | bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]); |
971 | 3.20M | bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]); |
972 | 3.20M | bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]); |
973 | 3.20M | bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]); |
974 | 3.20M | bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]); |
975 | 3.20M | bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]); |
976 | 3.20M | bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]); |
977 | 3.20M | bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]); |
978 | 3.20M | bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]); |
979 | 3.20M | bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]); |
980 | 3.20M | bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]); |
981 | 3.20M | bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]); |
982 | 3.20M | bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]); |
983 | 3.20M | bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]); |
984 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
985 | | |
986 | | // stage 8 |
987 | 3.20M | stage++; |
988 | 3.20M | bf0 = output; |
989 | 3.20M | bf1 = step; |
990 | 3.20M | bf1[0] = bf0[0]; |
991 | 3.20M | bf1[1] = bf0[1]; |
992 | 3.20M | bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); |
993 | 3.20M | bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); |
994 | 3.20M | bf1[4] = bf0[4]; |
995 | 3.20M | bf1[5] = bf0[5]; |
996 | 3.20M | bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); |
997 | 3.20M | bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); |
998 | 3.20M | bf1[8] = bf0[8]; |
999 | 3.20M | bf1[9] = bf0[9]; |
1000 | 3.20M | bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit); |
1001 | 3.20M | bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit); |
1002 | 3.20M | bf1[12] = bf0[12]; |
1003 | 3.20M | bf1[13] = bf0[13]; |
1004 | 3.20M | bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit); |
1005 | 3.20M | bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit); |
1006 | 3.20M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1007 | | |
1008 | | // stage 9 |
1009 | 3.20M | bf0 = step; |
1010 | 3.20M | bf1 = output; |
1011 | 3.20M | bf1[0] = bf0[0]; |
1012 | 3.20M | bf1[1] = -bf0[8]; |
1013 | 3.20M | bf1[2] = bf0[12]; |
1014 | 3.20M | bf1[3] = -bf0[4]; |
1015 | 3.20M | bf1[4] = bf0[6]; |
1016 | 3.20M | bf1[5] = -bf0[14]; |
1017 | 3.20M | bf1[6] = bf0[10]; |
1018 | 3.20M | bf1[7] = -bf0[2]; |
1019 | 3.20M | bf1[8] = bf0[3]; |
1020 | 3.20M | bf1[9] = -bf0[11]; |
1021 | 3.20M | bf1[10] = bf0[15]; |
1022 | 3.20M | bf1[11] = -bf0[7]; |
1023 | 3.20M | bf1[12] = bf0[5]; |
1024 | 3.20M | bf1[13] = -bf0[13]; |
1025 | 3.20M | bf1[14] = bf0[9]; |
1026 | 3.20M | bf1[15] = -bf0[1]; |
1027 | 3.20M | } |
1028 | | |
1029 | | void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, |
1030 | 649k | const int8_t *stage_range) { |
1031 | 649k | (void)cos_bit; |
1032 | 649k | (void)stage_range; |
1033 | 3.24M | for (int i = 0; i < 4; ++i) { |
1034 | 2.59M | output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits); |
1035 | 2.59M | } |
1036 | 649k | assert(stage_range[0] + NewSqrt2Bits <= 32); |
1037 | 649k | } |
1038 | | |
1039 | | void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit, |
1040 | 987k | const int8_t *stage_range) { |
1041 | 987k | (void)cos_bit; |
1042 | 987k | (void)stage_range; |
1043 | 8.88M | for (int i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2); |
1044 | 987k | } |
1045 | | |
1046 | | void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit, |
1047 | 497k | const int8_t *stage_range) { |
1048 | 497k | (void)cos_bit; |
1049 | 497k | (void)stage_range; |
1050 | 8.45M | for (int i = 0; i < 16; ++i) |
1051 | 7.96M | output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits); |
1052 | 497k | assert(stage_range[0] + NewSqrt2Bits <= 32); |
1053 | 497k | } |
1054 | | |
1055 | | void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit, |
1056 | 48.1k | const int8_t *stage_range) { |
1057 | 48.1k | (void)cos_bit; |
1058 | 48.1k | (void)stage_range; |
1059 | 1.58M | for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4); |
1060 | 48.1k | } |
1061 | | |
1062 | | void av1_idct64(const int32_t *input, int32_t *output, int8_t cos_bit, |
1063 | 3.98M | const int8_t *stage_range) { |
1064 | 3.98M | assert(output != input); |
1065 | 3.98M | const int32_t size = 64; |
1066 | 3.98M | const int32_t *cospi = cospi_arr(cos_bit); |
1067 | | |
1068 | 3.98M | int32_t stage = 0; |
1069 | 3.98M | int32_t *bf0, *bf1; |
1070 | 3.98M | int32_t step[64]; |
1071 | | |
1072 | | // stage 0; |
1073 | | |
1074 | | // stage 1; |
1075 | 3.98M | stage++; |
1076 | 3.98M | bf1 = output; |
1077 | 3.98M | bf1[0] = input[0]; |
1078 | 3.98M | bf1[1] = input[32]; |
1079 | 3.98M | bf1[2] = input[16]; |
1080 | 3.98M | bf1[3] = input[48]; |
1081 | 3.98M | bf1[4] = input[8]; |
1082 | 3.98M | bf1[5] = input[40]; |
1083 | 3.98M | bf1[6] = input[24]; |
1084 | 3.98M | bf1[7] = input[56]; |
1085 | 3.98M | bf1[8] = input[4]; |
1086 | 3.98M | bf1[9] = input[36]; |
1087 | 3.98M | bf1[10] = input[20]; |
1088 | 3.98M | bf1[11] = input[52]; |
1089 | 3.98M | bf1[12] = input[12]; |
1090 | 3.98M | bf1[13] = input[44]; |
1091 | 3.98M | bf1[14] = input[28]; |
1092 | 3.98M | bf1[15] = input[60]; |
1093 | 3.98M | bf1[16] = input[2]; |
1094 | 3.98M | bf1[17] = input[34]; |
1095 | 3.98M | bf1[18] = input[18]; |
1096 | 3.98M | bf1[19] = input[50]; |
1097 | 3.98M | bf1[20] = input[10]; |
1098 | 3.98M | bf1[21] = input[42]; |
1099 | 3.98M | bf1[22] = input[26]; |
1100 | 3.98M | bf1[23] = input[58]; |
1101 | 3.98M | bf1[24] = input[6]; |
1102 | 3.98M | bf1[25] = input[38]; |
1103 | 3.98M | bf1[26] = input[22]; |
1104 | 3.98M | bf1[27] = input[54]; |
1105 | 3.98M | bf1[28] = input[14]; |
1106 | 3.98M | bf1[29] = input[46]; |
1107 | 3.98M | bf1[30] = input[30]; |
1108 | 3.98M | bf1[31] = input[62]; |
1109 | 3.98M | bf1[32] = input[1]; |
1110 | 3.98M | bf1[33] = input[33]; |
1111 | 3.98M | bf1[34] = input[17]; |
1112 | 3.98M | bf1[35] = input[49]; |
1113 | 3.98M | bf1[36] = input[9]; |
1114 | 3.98M | bf1[37] = input[41]; |
1115 | 3.98M | bf1[38] = input[25]; |
1116 | 3.98M | bf1[39] = input[57]; |
1117 | 3.98M | bf1[40] = input[5]; |
1118 | 3.98M | bf1[41] = input[37]; |
1119 | 3.98M | bf1[42] = input[21]; |
1120 | 3.98M | bf1[43] = input[53]; |
1121 | 3.98M | bf1[44] = input[13]; |
1122 | 3.98M | bf1[45] = input[45]; |
1123 | 3.98M | bf1[46] = input[29]; |
1124 | 3.98M | bf1[47] = input[61]; |
1125 | 3.98M | bf1[48] = input[3]; |
1126 | 3.98M | bf1[49] = input[35]; |
1127 | 3.98M | bf1[50] = input[19]; |
1128 | 3.98M | bf1[51] = input[51]; |
1129 | 3.98M | bf1[52] = input[11]; |
1130 | 3.98M | bf1[53] = input[43]; |
1131 | 3.98M | bf1[54] = input[27]; |
1132 | 3.98M | bf1[55] = input[59]; |
1133 | 3.98M | bf1[56] = input[7]; |
1134 | 3.98M | bf1[57] = input[39]; |
1135 | 3.98M | bf1[58] = input[23]; |
1136 | 3.98M | bf1[59] = input[55]; |
1137 | 3.98M | bf1[60] = input[15]; |
1138 | 3.98M | bf1[61] = input[47]; |
1139 | 3.98M | bf1[62] = input[31]; |
1140 | 3.98M | bf1[63] = input[63]; |
1141 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1142 | | |
1143 | | // stage 2 |
1144 | 3.98M | stage++; |
1145 | 3.98M | bf0 = output; |
1146 | 3.98M | bf1 = step; |
1147 | 3.98M | bf1[0] = bf0[0]; |
1148 | 3.98M | bf1[1] = bf0[1]; |
1149 | 3.98M | bf1[2] = bf0[2]; |
1150 | 3.98M | bf1[3] = bf0[3]; |
1151 | 3.98M | bf1[4] = bf0[4]; |
1152 | 3.98M | bf1[5] = bf0[5]; |
1153 | 3.98M | bf1[6] = bf0[6]; |
1154 | 3.98M | bf1[7] = bf0[7]; |
1155 | 3.98M | bf1[8] = bf0[8]; |
1156 | 3.98M | bf1[9] = bf0[9]; |
1157 | 3.98M | bf1[10] = bf0[10]; |
1158 | 3.98M | bf1[11] = bf0[11]; |
1159 | 3.98M | bf1[12] = bf0[12]; |
1160 | 3.98M | bf1[13] = bf0[13]; |
1161 | 3.98M | bf1[14] = bf0[14]; |
1162 | 3.98M | bf1[15] = bf0[15]; |
1163 | 3.98M | bf1[16] = bf0[16]; |
1164 | 3.98M | bf1[17] = bf0[17]; |
1165 | 3.98M | bf1[18] = bf0[18]; |
1166 | 3.98M | bf1[19] = bf0[19]; |
1167 | 3.98M | bf1[20] = bf0[20]; |
1168 | 3.98M | bf1[21] = bf0[21]; |
1169 | 3.98M | bf1[22] = bf0[22]; |
1170 | 3.98M | bf1[23] = bf0[23]; |
1171 | 3.98M | bf1[24] = bf0[24]; |
1172 | 3.98M | bf1[25] = bf0[25]; |
1173 | 3.98M | bf1[26] = bf0[26]; |
1174 | 3.98M | bf1[27] = bf0[27]; |
1175 | 3.98M | bf1[28] = bf0[28]; |
1176 | 3.98M | bf1[29] = bf0[29]; |
1177 | 3.98M | bf1[30] = bf0[30]; |
1178 | 3.98M | bf1[31] = bf0[31]; |
1179 | 3.98M | bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit); |
1180 | 3.98M | bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit); |
1181 | 3.98M | bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit); |
1182 | 3.98M | bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit); |
1183 | 3.98M | bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit); |
1184 | 3.98M | bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit); |
1185 | 3.98M | bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit); |
1186 | 3.98M | bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit); |
1187 | 3.98M | bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit); |
1188 | 3.98M | bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit); |
1189 | 3.98M | bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit); |
1190 | 3.98M | bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit); |
1191 | 3.98M | bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit); |
1192 | 3.98M | bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit); |
1193 | 3.98M | bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit); |
1194 | 3.98M | bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit); |
1195 | 3.98M | bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit); |
1196 | 3.98M | bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit); |
1197 | 3.98M | bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit); |
1198 | 3.98M | bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit); |
1199 | 3.98M | bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit); |
1200 | 3.98M | bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit); |
1201 | 3.98M | bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit); |
1202 | 3.98M | bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit); |
1203 | 3.98M | bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit); |
1204 | 3.98M | bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit); |
1205 | 3.98M | bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit); |
1206 | 3.98M | bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit); |
1207 | 3.98M | bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit); |
1208 | 3.98M | bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit); |
1209 | 3.98M | bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit); |
1210 | 3.98M | bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit); |
1211 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1212 | | |
1213 | | // stage 3 |
1214 | 3.98M | stage++; |
1215 | 3.98M | bf0 = step; |
1216 | 3.98M | bf1 = output; |
1217 | 3.98M | bf1[0] = bf0[0]; |
1218 | 3.98M | bf1[1] = bf0[1]; |
1219 | 3.98M | bf1[2] = bf0[2]; |
1220 | 3.98M | bf1[3] = bf0[3]; |
1221 | 3.98M | bf1[4] = bf0[4]; |
1222 | 3.98M | bf1[5] = bf0[5]; |
1223 | 3.98M | bf1[6] = bf0[6]; |
1224 | 3.98M | bf1[7] = bf0[7]; |
1225 | 3.98M | bf1[8] = bf0[8]; |
1226 | 3.98M | bf1[9] = bf0[9]; |
1227 | 3.98M | bf1[10] = bf0[10]; |
1228 | 3.98M | bf1[11] = bf0[11]; |
1229 | 3.98M | bf1[12] = bf0[12]; |
1230 | 3.98M | bf1[13] = bf0[13]; |
1231 | 3.98M | bf1[14] = bf0[14]; |
1232 | 3.98M | bf1[15] = bf0[15]; |
1233 | 3.98M | bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit); |
1234 | 3.98M | bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit); |
1235 | 3.98M | bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit); |
1236 | 3.98M | bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit); |
1237 | 3.98M | bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit); |
1238 | 3.98M | bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit); |
1239 | 3.98M | bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit); |
1240 | 3.98M | bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit); |
1241 | 3.98M | bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit); |
1242 | 3.98M | bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit); |
1243 | 3.98M | bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit); |
1244 | 3.98M | bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit); |
1245 | 3.98M | bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit); |
1246 | 3.98M | bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit); |
1247 | 3.98M | bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit); |
1248 | 3.98M | bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit); |
1249 | 3.98M | bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]); |
1250 | 3.98M | bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]); |
1251 | 3.98M | bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]); |
1252 | 3.98M | bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]); |
1253 | 3.98M | bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]); |
1254 | 3.98M | bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]); |
1255 | 3.98M | bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]); |
1256 | 3.98M | bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]); |
1257 | 3.98M | bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]); |
1258 | 3.98M | bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]); |
1259 | 3.98M | bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]); |
1260 | 3.98M | bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]); |
1261 | 3.98M | bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]); |
1262 | 3.98M | bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]); |
1263 | 3.98M | bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]); |
1264 | 3.98M | bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]); |
1265 | 3.98M | bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]); |
1266 | 3.98M | bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]); |
1267 | 3.98M | bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]); |
1268 | 3.98M | bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]); |
1269 | 3.98M | bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]); |
1270 | 3.98M | bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]); |
1271 | 3.98M | bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]); |
1272 | 3.98M | bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]); |
1273 | 3.98M | bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]); |
1274 | 3.98M | bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]); |
1275 | 3.98M | bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]); |
1276 | 3.98M | bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]); |
1277 | 3.98M | bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]); |
1278 | 3.98M | bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]); |
1279 | 3.98M | bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]); |
1280 | 3.98M | bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]); |
1281 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1282 | | |
1283 | | // stage 4 |
1284 | 3.98M | stage++; |
1285 | 3.98M | bf0 = output; |
1286 | 3.98M | bf1 = step; |
1287 | 3.98M | bf1[0] = bf0[0]; |
1288 | 3.98M | bf1[1] = bf0[1]; |
1289 | 3.98M | bf1[2] = bf0[2]; |
1290 | 3.98M | bf1[3] = bf0[3]; |
1291 | 3.98M | bf1[4] = bf0[4]; |
1292 | 3.98M | bf1[5] = bf0[5]; |
1293 | 3.98M | bf1[6] = bf0[6]; |
1294 | 3.98M | bf1[7] = bf0[7]; |
1295 | 3.98M | bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit); |
1296 | 3.98M | bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit); |
1297 | 3.98M | bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit); |
1298 | 3.98M | bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit); |
1299 | 3.98M | bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit); |
1300 | 3.98M | bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit); |
1301 | 3.98M | bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit); |
1302 | 3.98M | bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit); |
1303 | 3.98M | bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]); |
1304 | 3.98M | bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]); |
1305 | 3.98M | bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]); |
1306 | 3.98M | bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]); |
1307 | 3.98M | bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]); |
1308 | 3.98M | bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]); |
1309 | 3.98M | bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]); |
1310 | 3.98M | bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]); |
1311 | 3.98M | bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]); |
1312 | 3.98M | bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]); |
1313 | 3.98M | bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]); |
1314 | 3.98M | bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]); |
1315 | 3.98M | bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]); |
1316 | 3.98M | bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]); |
1317 | 3.98M | bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]); |
1318 | 3.98M | bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]); |
1319 | 3.98M | bf1[32] = bf0[32]; |
1320 | 3.98M | bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit); |
1321 | 3.98M | bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit); |
1322 | 3.98M | bf1[35] = bf0[35]; |
1323 | 3.98M | bf1[36] = bf0[36]; |
1324 | 3.98M | bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit); |
1325 | 3.98M | bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit); |
1326 | 3.98M | bf1[39] = bf0[39]; |
1327 | 3.98M | bf1[40] = bf0[40]; |
1328 | 3.98M | bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit); |
1329 | 3.98M | bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit); |
1330 | 3.98M | bf1[43] = bf0[43]; |
1331 | 3.98M | bf1[44] = bf0[44]; |
1332 | 3.98M | bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit); |
1333 | 3.98M | bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit); |
1334 | 3.98M | bf1[47] = bf0[47]; |
1335 | 3.98M | bf1[48] = bf0[48]; |
1336 | 3.98M | bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit); |
1337 | 3.98M | bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit); |
1338 | 3.98M | bf1[51] = bf0[51]; |
1339 | 3.98M | bf1[52] = bf0[52]; |
1340 | 3.98M | bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit); |
1341 | 3.98M | bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit); |
1342 | 3.98M | bf1[55] = bf0[55]; |
1343 | 3.98M | bf1[56] = bf0[56]; |
1344 | 3.98M | bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit); |
1345 | 3.98M | bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit); |
1346 | 3.98M | bf1[59] = bf0[59]; |
1347 | 3.98M | bf1[60] = bf0[60]; |
1348 | 3.98M | bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit); |
1349 | 3.98M | bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit); |
1350 | 3.98M | bf1[63] = bf0[63]; |
1351 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1352 | | |
1353 | | // stage 5 |
1354 | 3.98M | stage++; |
1355 | 3.98M | bf0 = step; |
1356 | 3.98M | bf1 = output; |
1357 | 3.98M | bf1[0] = bf0[0]; |
1358 | 3.98M | bf1[1] = bf0[1]; |
1359 | 3.98M | bf1[2] = bf0[2]; |
1360 | 3.98M | bf1[3] = bf0[3]; |
1361 | 3.98M | bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); |
1362 | 3.98M | bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); |
1363 | 3.98M | bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); |
1364 | 3.98M | bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); |
1365 | 3.98M | bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]); |
1366 | 3.98M | bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]); |
1367 | 3.98M | bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]); |
1368 | 3.98M | bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]); |
1369 | 3.98M | bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]); |
1370 | 3.98M | bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]); |
1371 | 3.98M | bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]); |
1372 | 3.98M | bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]); |
1373 | 3.98M | bf1[16] = bf0[16]; |
1374 | 3.98M | bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); |
1375 | 3.98M | bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); |
1376 | 3.98M | bf1[19] = bf0[19]; |
1377 | 3.98M | bf1[20] = bf0[20]; |
1378 | 3.98M | bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); |
1379 | 3.98M | bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); |
1380 | 3.98M | bf1[23] = bf0[23]; |
1381 | 3.98M | bf1[24] = bf0[24]; |
1382 | 3.98M | bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit); |
1383 | 3.98M | bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit); |
1384 | 3.98M | bf1[27] = bf0[27]; |
1385 | 3.98M | bf1[28] = bf0[28]; |
1386 | 3.98M | bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit); |
1387 | 3.98M | bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit); |
1388 | 3.98M | bf1[31] = bf0[31]; |
1389 | 3.98M | bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]); |
1390 | 3.98M | bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]); |
1391 | 3.98M | bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]); |
1392 | 3.98M | bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]); |
1393 | 3.98M | bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]); |
1394 | 3.98M | bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]); |
1395 | 3.98M | bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]); |
1396 | 3.98M | bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]); |
1397 | 3.98M | bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]); |
1398 | 3.98M | bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]); |
1399 | 3.98M | bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]); |
1400 | 3.98M | bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]); |
1401 | 3.98M | bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]); |
1402 | 3.98M | bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]); |
1403 | 3.98M | bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]); |
1404 | 3.98M | bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]); |
1405 | 3.98M | bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]); |
1406 | 3.98M | bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]); |
1407 | 3.98M | bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]); |
1408 | 3.98M | bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]); |
1409 | 3.98M | bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]); |
1410 | 3.98M | bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]); |
1411 | 3.98M | bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]); |
1412 | 3.98M | bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]); |
1413 | 3.98M | bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]); |
1414 | 3.98M | bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]); |
1415 | 3.98M | bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]); |
1416 | 3.98M | bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]); |
1417 | 3.98M | bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]); |
1418 | 3.98M | bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]); |
1419 | 3.98M | bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]); |
1420 | 3.98M | bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]); |
1421 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1422 | | |
1423 | | // stage 6 |
1424 | 3.98M | stage++; |
1425 | 3.98M | bf0 = output; |
1426 | 3.98M | bf1 = step; |
1427 | 3.98M | bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); |
1428 | 3.98M | bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); |
1429 | 3.98M | bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); |
1430 | 3.98M | bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); |
1431 | 3.98M | bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); |
1432 | 3.98M | bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); |
1433 | 3.98M | bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); |
1434 | 3.98M | bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); |
1435 | 3.98M | bf1[8] = bf0[8]; |
1436 | 3.98M | bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); |
1437 | 3.98M | bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); |
1438 | 3.98M | bf1[11] = bf0[11]; |
1439 | 3.98M | bf1[12] = bf0[12]; |
1440 | 3.98M | bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit); |
1441 | 3.98M | bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit); |
1442 | 3.98M | bf1[15] = bf0[15]; |
1443 | 3.98M | bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]); |
1444 | 3.98M | bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]); |
1445 | 3.98M | bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]); |
1446 | 3.98M | bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]); |
1447 | 3.98M | bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]); |
1448 | 3.98M | bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]); |
1449 | 3.98M | bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]); |
1450 | 3.98M | bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]); |
1451 | 3.98M | bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]); |
1452 | 3.98M | bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]); |
1453 | 3.98M | bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]); |
1454 | 3.98M | bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]); |
1455 | 3.98M | bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]); |
1456 | 3.98M | bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]); |
1457 | 3.98M | bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]); |
1458 | 3.98M | bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]); |
1459 | 3.98M | bf1[32] = bf0[32]; |
1460 | 3.98M | bf1[33] = bf0[33]; |
1461 | 3.98M | bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit); |
1462 | 3.98M | bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit); |
1463 | 3.98M | bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit); |
1464 | 3.98M | bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit); |
1465 | 3.98M | bf1[38] = bf0[38]; |
1466 | 3.98M | bf1[39] = bf0[39]; |
1467 | 3.98M | bf1[40] = bf0[40]; |
1468 | 3.98M | bf1[41] = bf0[41]; |
1469 | 3.98M | bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit); |
1470 | 3.98M | bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit); |
1471 | 3.98M | bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit); |
1472 | 3.98M | bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit); |
1473 | 3.98M | bf1[46] = bf0[46]; |
1474 | 3.98M | bf1[47] = bf0[47]; |
1475 | 3.98M | bf1[48] = bf0[48]; |
1476 | 3.98M | bf1[49] = bf0[49]; |
1477 | 3.98M | bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit); |
1478 | 3.98M | bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit); |
1479 | 3.98M | bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit); |
1480 | 3.98M | bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit); |
1481 | 3.98M | bf1[54] = bf0[54]; |
1482 | 3.98M | bf1[55] = bf0[55]; |
1483 | 3.98M | bf1[56] = bf0[56]; |
1484 | 3.98M | bf1[57] = bf0[57]; |
1485 | 3.98M | bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit); |
1486 | 3.98M | bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit); |
1487 | 3.98M | bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit); |
1488 | 3.98M | bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit); |
1489 | 3.98M | bf1[62] = bf0[62]; |
1490 | 3.98M | bf1[63] = bf0[63]; |
1491 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1492 | | |
1493 | | // stage 7 |
1494 | 3.98M | stage++; |
1495 | 3.98M | bf0 = step; |
1496 | 3.98M | bf1 = output; |
1497 | 3.98M | bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); |
1498 | 3.98M | bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); |
1499 | 3.98M | bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); |
1500 | 3.98M | bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); |
1501 | 3.98M | bf1[4] = bf0[4]; |
1502 | 3.98M | bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
1503 | 3.98M | bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); |
1504 | 3.98M | bf1[7] = bf0[7]; |
1505 | 3.98M | bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]); |
1506 | 3.98M | bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]); |
1507 | 3.98M | bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]); |
1508 | 3.98M | bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]); |
1509 | 3.98M | bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]); |
1510 | 3.98M | bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]); |
1511 | 3.98M | bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]); |
1512 | 3.98M | bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]); |
1513 | 3.98M | bf1[16] = bf0[16]; |
1514 | 3.98M | bf1[17] = bf0[17]; |
1515 | 3.98M | bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); |
1516 | 3.98M | bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); |
1517 | 3.98M | bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); |
1518 | 3.98M | bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); |
1519 | 3.98M | bf1[22] = bf0[22]; |
1520 | 3.98M | bf1[23] = bf0[23]; |
1521 | 3.98M | bf1[24] = bf0[24]; |
1522 | 3.98M | bf1[25] = bf0[25]; |
1523 | 3.98M | bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit); |
1524 | 3.98M | bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit); |
1525 | 3.98M | bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit); |
1526 | 3.98M | bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit); |
1527 | 3.98M | bf1[30] = bf0[30]; |
1528 | 3.98M | bf1[31] = bf0[31]; |
1529 | 3.98M | bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]); |
1530 | 3.98M | bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]); |
1531 | 3.98M | bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]); |
1532 | 3.98M | bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]); |
1533 | 3.98M | bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]); |
1534 | 3.98M | bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]); |
1535 | 3.98M | bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]); |
1536 | 3.98M | bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]); |
1537 | 3.98M | bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]); |
1538 | 3.98M | bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]); |
1539 | 3.98M | bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]); |
1540 | 3.98M | bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]); |
1541 | 3.98M | bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]); |
1542 | 3.98M | bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]); |
1543 | 3.98M | bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]); |
1544 | 3.98M | bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]); |
1545 | 3.98M | bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]); |
1546 | 3.98M | bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]); |
1547 | 3.98M | bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]); |
1548 | 3.98M | bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]); |
1549 | 3.98M | bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]); |
1550 | 3.98M | bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]); |
1551 | 3.98M | bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]); |
1552 | 3.98M | bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]); |
1553 | 3.98M | bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]); |
1554 | 3.98M | bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]); |
1555 | 3.98M | bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]); |
1556 | 3.98M | bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]); |
1557 | 3.98M | bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]); |
1558 | 3.98M | bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]); |
1559 | 3.98M | bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]); |
1560 | 3.98M | bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]); |
1561 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1562 | | |
1563 | | // stage 8 |
1564 | 3.98M | stage++; |
1565 | 3.98M | bf0 = output; |
1566 | 3.98M | bf1 = step; |
1567 | 3.98M | bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); |
1568 | 3.98M | bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); |
1569 | 3.98M | bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); |
1570 | 3.98M | bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); |
1571 | 3.98M | bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); |
1572 | 3.98M | bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); |
1573 | 3.98M | bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); |
1574 | 3.98M | bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); |
1575 | 3.98M | bf1[8] = bf0[8]; |
1576 | 3.98M | bf1[9] = bf0[9]; |
1577 | 3.98M | bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); |
1578 | 3.98M | bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); |
1579 | 3.98M | bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); |
1580 | 3.98M | bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); |
1581 | 3.98M | bf1[14] = bf0[14]; |
1582 | 3.98M | bf1[15] = bf0[15]; |
1583 | 3.98M | bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]); |
1584 | 3.98M | bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]); |
1585 | 3.98M | bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]); |
1586 | 3.98M | bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]); |
1587 | 3.98M | bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]); |
1588 | 3.98M | bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]); |
1589 | 3.98M | bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]); |
1590 | 3.98M | bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]); |
1591 | 3.98M | bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]); |
1592 | 3.98M | bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]); |
1593 | 3.98M | bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]); |
1594 | 3.98M | bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]); |
1595 | 3.98M | bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]); |
1596 | 3.98M | bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]); |
1597 | 3.98M | bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]); |
1598 | 3.98M | bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]); |
1599 | 3.98M | bf1[32] = bf0[32]; |
1600 | 3.98M | bf1[33] = bf0[33]; |
1601 | 3.98M | bf1[34] = bf0[34]; |
1602 | 3.98M | bf1[35] = bf0[35]; |
1603 | 3.98M | bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit); |
1604 | 3.98M | bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit); |
1605 | 3.98M | bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit); |
1606 | 3.98M | bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit); |
1607 | 3.98M | bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit); |
1608 | 3.98M | bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit); |
1609 | 3.98M | bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit); |
1610 | 3.98M | bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit); |
1611 | 3.98M | bf1[44] = bf0[44]; |
1612 | 3.98M | bf1[45] = bf0[45]; |
1613 | 3.98M | bf1[46] = bf0[46]; |
1614 | 3.98M | bf1[47] = bf0[47]; |
1615 | 3.98M | bf1[48] = bf0[48]; |
1616 | 3.98M | bf1[49] = bf0[49]; |
1617 | 3.98M | bf1[50] = bf0[50]; |
1618 | 3.98M | bf1[51] = bf0[51]; |
1619 | 3.98M | bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit); |
1620 | 3.98M | bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit); |
1621 | 3.98M | bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit); |
1622 | 3.98M | bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit); |
1623 | 3.98M | bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit); |
1624 | 3.98M | bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit); |
1625 | 3.98M | bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit); |
1626 | 3.98M | bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit); |
1627 | 3.98M | bf1[60] = bf0[60]; |
1628 | 3.98M | bf1[61] = bf0[61]; |
1629 | 3.98M | bf1[62] = bf0[62]; |
1630 | 3.98M | bf1[63] = bf0[63]; |
1631 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1632 | | |
1633 | | // stage 9 |
1634 | 3.98M | stage++; |
1635 | 3.98M | bf0 = step; |
1636 | 3.98M | bf1 = output; |
1637 | 3.98M | bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]); |
1638 | 3.98M | bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]); |
1639 | 3.98M | bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]); |
1640 | 3.98M | bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]); |
1641 | 3.98M | bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]); |
1642 | 3.98M | bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]); |
1643 | 3.98M | bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]); |
1644 | 3.98M | bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]); |
1645 | 3.98M | bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]); |
1646 | 3.98M | bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]); |
1647 | 3.98M | bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]); |
1648 | 3.98M | bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]); |
1649 | 3.98M | bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]); |
1650 | 3.98M | bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]); |
1651 | 3.98M | bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]); |
1652 | 3.98M | bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]); |
1653 | 3.98M | bf1[16] = bf0[16]; |
1654 | 3.98M | bf1[17] = bf0[17]; |
1655 | 3.98M | bf1[18] = bf0[18]; |
1656 | 3.98M | bf1[19] = bf0[19]; |
1657 | 3.98M | bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); |
1658 | 3.98M | bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); |
1659 | 3.98M | bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); |
1660 | 3.98M | bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); |
1661 | 3.98M | bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); |
1662 | 3.98M | bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); |
1663 | 3.98M | bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); |
1664 | 3.98M | bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); |
1665 | 3.98M | bf1[28] = bf0[28]; |
1666 | 3.98M | bf1[29] = bf0[29]; |
1667 | 3.98M | bf1[30] = bf0[30]; |
1668 | 3.98M | bf1[31] = bf0[31]; |
1669 | 3.98M | bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]); |
1670 | 3.98M | bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]); |
1671 | 3.98M | bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]); |
1672 | 3.98M | bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]); |
1673 | 3.98M | bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]); |
1674 | 3.98M | bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]); |
1675 | 3.98M | bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]); |
1676 | 3.98M | bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]); |
1677 | 3.98M | bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]); |
1678 | 3.98M | bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]); |
1679 | 3.98M | bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]); |
1680 | 3.98M | bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]); |
1681 | 3.98M | bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]); |
1682 | 3.98M | bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]); |
1683 | 3.98M | bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]); |
1684 | 3.98M | bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]); |
1685 | 3.98M | bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]); |
1686 | 3.98M | bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]); |
1687 | 3.98M | bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]); |
1688 | 3.98M | bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]); |
1689 | 3.98M | bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]); |
1690 | 3.98M | bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]); |
1691 | 3.98M | bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]); |
1692 | 3.98M | bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]); |
1693 | 3.98M | bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]); |
1694 | 3.98M | bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]); |
1695 | 3.98M | bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]); |
1696 | 3.98M | bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]); |
1697 | 3.98M | bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]); |
1698 | 3.98M | bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]); |
1699 | 3.98M | bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]); |
1700 | 3.98M | bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]); |
1701 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1702 | | |
1703 | | // stage 10 |
1704 | 3.98M | stage++; |
1705 | 3.98M | bf0 = output; |
1706 | 3.98M | bf1 = step; |
1707 | 3.98M | bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]); |
1708 | 3.98M | bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]); |
1709 | 3.98M | bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]); |
1710 | 3.98M | bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]); |
1711 | 3.98M | bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]); |
1712 | 3.98M | bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]); |
1713 | 3.98M | bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]); |
1714 | 3.98M | bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]); |
1715 | 3.98M | bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]); |
1716 | 3.98M | bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]); |
1717 | 3.98M | bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]); |
1718 | 3.98M | bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]); |
1719 | 3.98M | bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]); |
1720 | 3.98M | bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]); |
1721 | 3.98M | bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]); |
1722 | 3.98M | bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]); |
1723 | 3.98M | bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]); |
1724 | 3.98M | bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]); |
1725 | 3.98M | bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]); |
1726 | 3.98M | bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]); |
1727 | 3.98M | bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]); |
1728 | 3.98M | bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]); |
1729 | 3.98M | bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]); |
1730 | 3.98M | bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]); |
1731 | 3.98M | bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]); |
1732 | 3.98M | bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]); |
1733 | 3.98M | bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]); |
1734 | 3.98M | bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]); |
1735 | 3.98M | bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]); |
1736 | 3.98M | bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]); |
1737 | 3.98M | bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]); |
1738 | 3.98M | bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]); |
1739 | 3.98M | bf1[32] = bf0[32]; |
1740 | 3.98M | bf1[33] = bf0[33]; |
1741 | 3.98M | bf1[34] = bf0[34]; |
1742 | 3.98M | bf1[35] = bf0[35]; |
1743 | 3.98M | bf1[36] = bf0[36]; |
1744 | 3.98M | bf1[37] = bf0[37]; |
1745 | 3.98M | bf1[38] = bf0[38]; |
1746 | 3.98M | bf1[39] = bf0[39]; |
1747 | 3.98M | bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit); |
1748 | 3.98M | bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit); |
1749 | 3.98M | bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit); |
1750 | 3.98M | bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit); |
1751 | 3.98M | bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit); |
1752 | 3.98M | bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit); |
1753 | 3.98M | bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit); |
1754 | 3.98M | bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit); |
1755 | 3.98M | bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit); |
1756 | 3.98M | bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit); |
1757 | 3.98M | bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit); |
1758 | 3.98M | bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit); |
1759 | 3.98M | bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit); |
1760 | 3.98M | bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit); |
1761 | 3.98M | bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit); |
1762 | 3.98M | bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit); |
1763 | 3.98M | bf1[56] = bf0[56]; |
1764 | 3.98M | bf1[57] = bf0[57]; |
1765 | 3.98M | bf1[58] = bf0[58]; |
1766 | 3.98M | bf1[59] = bf0[59]; |
1767 | 3.98M | bf1[60] = bf0[60]; |
1768 | 3.98M | bf1[61] = bf0[61]; |
1769 | 3.98M | bf1[62] = bf0[62]; |
1770 | 3.98M | bf1[63] = bf0[63]; |
1771 | 3.98M | av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); |
1772 | | |
1773 | | // stage 11 |
1774 | 3.98M | stage++; |
1775 | 3.98M | bf0 = step; |
1776 | 3.98M | bf1 = output; |
1777 | 3.98M | bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]); |
1778 | 3.98M | bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]); |
1779 | 3.98M | bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]); |
1780 | 3.98M | bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]); |
1781 | 3.98M | bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]); |
1782 | 3.98M | bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]); |
1783 | 3.98M | bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]); |
1784 | 3.98M | bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]); |
1785 | 3.98M | bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]); |
1786 | 3.98M | bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]); |
1787 | 3.98M | bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]); |
1788 | 3.98M | bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]); |
1789 | 3.98M | bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]); |
1790 | 3.98M | bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]); |
1791 | 3.98M | bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]); |
1792 | 3.98M | bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]); |
1793 | 3.98M | bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]); |
1794 | 3.98M | bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]); |
1795 | 3.98M | bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]); |
1796 | 3.98M | bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]); |
1797 | 3.98M | bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]); |
1798 | 3.98M | bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]); |
1799 | 3.98M | bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]); |
1800 | 3.98M | bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]); |
1801 | 3.98M | bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]); |
1802 | 3.98M | bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]); |
1803 | 3.98M | bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]); |
1804 | 3.98M | bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]); |
1805 | 3.98M | bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]); |
1806 | 3.98M | bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]); |
1807 | 3.98M | bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]); |
1808 | 3.98M | bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]); |
1809 | 3.98M | bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]); |
1810 | 3.98M | bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]); |
1811 | 3.98M | bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]); |
1812 | 3.98M | bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]); |
1813 | 3.98M | bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]); |
1814 | 3.98M | bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]); |
1815 | 3.98M | bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]); |
1816 | 3.98M | bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]); |
1817 | 3.98M | bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]); |
1818 | 3.98M | bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]); |
1819 | 3.98M | bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]); |
1820 | 3.98M | bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]); |
1821 | 3.98M | bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]); |
1822 | 3.98M | bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]); |
1823 | 3.98M | bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]); |
1824 | 3.98M | bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]); |
1825 | 3.98M | bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]); |
1826 | 3.98M | bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]); |
1827 | 3.98M | bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]); |
1828 | 3.98M | bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]); |
1829 | 3.98M | bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]); |
1830 | 3.98M | bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]); |
1831 | 3.98M | bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]); |
1832 | 3.98M | bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]); |
1833 | 3.98M | bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]); |
1834 | 3.98M | bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]); |
1835 | 3.98M | bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]); |
1836 | 3.98M | bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]); |
1837 | 3.98M | bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]); |
1838 | 3.98M | bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]); |
1839 | 3.98M | bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]); |
1840 | 3.98M | bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]); |
1841 | 3.98M | } |