/src/ffmpeg/libavcodec/dcadct.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2016 foo86 |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include <stdlib.h> |
22 | | |
23 | | #include "dcadct.h" |
24 | | #include "dcamath.h" |
25 | | |
26 | | static void sum_a(const int *input, int *output, int len) |
27 | 915k | { |
28 | 915k | int i; |
29 | | |
30 | 11.8M | for (i = 0; i < len; i++) |
31 | 10.9M | output[i] = input[2 * i] + input[2 * i + 1]; |
32 | 915k | } |
33 | | |
34 | | static void sum_b(const int *input, int *output, int len) |
35 | 915k | { |
36 | 915k | int i; |
37 | | |
38 | 915k | output[0] = input[0]; |
39 | 10.9M | for (i = 1; i < len; i++) |
40 | 10.0M | output[i] = input[2 * i] + input[2 * i - 1]; |
41 | 915k | } |
42 | | |
43 | | static void sum_c(const int *input, int *output, int len) |
44 | 457k | { |
45 | 457k | int i; |
46 | | |
47 | 4.11M | for (i = 0; i < len; i++) |
48 | 3.66M | output[i] = input[2 * i]; |
49 | 457k | } |
50 | | |
51 | | static void sum_d(const int *input, int *output, int len) |
52 | 457k | { |
53 | 457k | int i; |
54 | | |
55 | 457k | output[0] = input[1]; |
56 | 3.66M | for (i = 1; i < len; i++) |
57 | 3.20M | output[i] = input[2 * i - 1] + input[2 * i + 1]; |
58 | 457k | } |
59 | | |
60 | | static void dct_a(const int *input, int *output) |
61 | 457k | { |
62 | 457k | static const int cos_mod[8][8] = { |
63 | 457k | { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 }, |
64 | 457k | { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 }, |
65 | 457k | { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 }, |
66 | 457k | { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 }, |
67 | 457k | { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 }, |
68 | 457k | { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 }, |
69 | 457k | { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 }, |
70 | 457k | { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 } |
71 | 457k | }; |
72 | | |
73 | 457k | int i, j; |
74 | | |
75 | 4.11M | for (i = 0; i < 8; i++) { |
76 | 3.66M | int64_t res = 0; |
77 | 32.9M | for (j = 0; j < 8; j++) |
78 | 29.2M | res += (int64_t)cos_mod[i][j] * input[j]; |
79 | 3.66M | output[i] = norm23(res); |
80 | 3.66M | } |
81 | 457k | } |
82 | | |
83 | | static void dct_b(const int *input, int *output) |
84 | 1.37M | { |
85 | 1.37M | static const int cos_mod[8][7] = { |
86 | 1.37M | { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 }, |
87 | 1.37M | { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 }, |
88 | 1.37M | { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 }, |
89 | 1.37M | { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 }, |
90 | 1.37M | { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 }, |
91 | 1.37M | { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 }, |
92 | 1.37M | { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 }, |
93 | 1.37M | { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 } |
94 | 1.37M | }; |
95 | | |
96 | 1.37M | int i, j; |
97 | | |
98 | 12.3M | for (i = 0; i < 8; i++) { |
99 | 10.9M | int64_t res = input[0] * (INT64_C(1) << 23); |
100 | 87.8M | for (j = 0; j < 7; j++) |
101 | 76.8M | res += (int64_t)cos_mod[i][j] * input[1 + j]; |
102 | 10.9M | output[i] = norm23(res); |
103 | 10.9M | } |
104 | 1.37M | } |
105 | | |
106 | | static void mod_a(const int *input, int *output) |
107 | 457k | { |
108 | 457k | static const int cos_mod[16] = { |
109 | 457k | 4199362, 4240198, 4323885, 4454708, |
110 | 457k | 4639772, 4890013, 5221943, 5660703, |
111 | 457k | -6245623, -7040975, -8158494, -9809974, |
112 | 457k | -12450076, -17261920, -28585092, -85479984 |
113 | 457k | }; |
114 | | |
115 | 457k | int i, k; |
116 | | |
117 | 4.11M | for (i = 0; i < 8; i++) |
118 | 3.66M | output[i] = mul23(cos_mod[i], input[i] + input[8 + i]); |
119 | | |
120 | 4.11M | for (i = 8, k = 7; i < 16; i++, k--) |
121 | 3.66M | output[i] = mul23(cos_mod[i], input[k] - input[8 + k]); |
122 | 457k | } |
123 | | |
124 | | static void mod_b(int *input, int *output) |
125 | 457k | { |
126 | 457k | static const int cos_mod[8] = { |
127 | 457k | 4214598, 4383036, 4755871, 5425934, |
128 | 457k | 6611520, 8897610, 14448934, 42791536 |
129 | 457k | }; |
130 | | |
131 | 457k | int i, k; |
132 | | |
133 | 4.11M | for (i = 0; i < 8; i++) |
134 | 3.66M | input[8 + i] = mul23(cos_mod[i], input[8 + i]); |
135 | | |
136 | 4.11M | for (i = 0; i < 8; i++) |
137 | 3.66M | output[i] = input[i] + input[8 + i]; |
138 | | |
139 | 4.11M | for (i = 8, k = 7; i < 16; i++, k--) |
140 | 3.66M | output[i] = input[k] - input[8 + k]; |
141 | 457k | } |
142 | | |
143 | | static void mod_c(const int *input, int *output) |
144 | 457k | { |
145 | 457k | static const int cos_mod[32] = { |
146 | 457k | 1048892, 1051425, 1056522, 1064244, |
147 | 457k | 1074689, 1087987, 1104313, 1123884, |
148 | 457k | 1146975, 1173922, 1205139, 1241133, |
149 | 457k | 1282529, 1330095, 1384791, 1447815, |
150 | 457k | -1520688, -1605358, -1704360, -1821051, |
151 | 457k | -1959964, -2127368, -2332183, -2587535, |
152 | 457k | -2913561, -3342802, -3931480, -4785806, |
153 | 457k | -6133390, -8566050, -14253820, -42727120 |
154 | 457k | }; |
155 | | |
156 | 457k | int i, k; |
157 | | |
158 | 7.77M | for (i = 0; i < 16; i++) |
159 | 7.32M | output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); |
160 | | |
161 | 7.77M | for (i = 16, k = 15; i < 32; i++, k--) |
162 | 7.32M | output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); |
163 | 457k | } |
164 | | |
165 | | static void clp_v(int *input, int len) |
166 | 1.83M | { |
167 | 1.83M | int i; |
168 | | |
169 | 60.3M | for (i = 0; i < len; i++) |
170 | 58.5M | input[i] = clip23(input[i]); |
171 | 1.83M | } |
172 | | |
173 | | static void imdct_half_32(int32_t *output, const int32_t *input) |
174 | 457k | { |
175 | 457k | int buf_a[32], buf_b[32]; |
176 | 457k | int i, k, mag, shift, round; |
177 | | |
178 | 457k | mag = 0; |
179 | 15.0M | for (i = 0; i < 32; i++) |
180 | 14.6M | mag += abs(input[i]); |
181 | | |
182 | 457k | shift = mag > 0x400000 ? 2 : 0; |
183 | 457k | round = shift > 0 ? 1 << (shift - 1) : 0; |
184 | | |
185 | 15.0M | for (i = 0; i < 32; i++) |
186 | 14.6M | buf_a[i] = (input[i] + round) >> shift; |
187 | | |
188 | 457k | sum_a(buf_a, buf_b + 0, 16); |
189 | 457k | sum_b(buf_a, buf_b + 16, 16); |
190 | 457k | clp_v(buf_b, 32); |
191 | | |
192 | 457k | sum_a(buf_b + 0, buf_a + 0, 8); |
193 | 457k | sum_b(buf_b + 0, buf_a + 8, 8); |
194 | 457k | sum_c(buf_b + 16, buf_a + 16, 8); |
195 | 457k | sum_d(buf_b + 16, buf_a + 24, 8); |
196 | 457k | clp_v(buf_a, 32); |
197 | | |
198 | 457k | dct_a(buf_a + 0, buf_b + 0); |
199 | 457k | dct_b(buf_a + 8, buf_b + 8); |
200 | 457k | dct_b(buf_a + 16, buf_b + 16); |
201 | 457k | dct_b(buf_a + 24, buf_b + 24); |
202 | 457k | clp_v(buf_b, 32); |
203 | | |
204 | 457k | mod_a(buf_b + 0, buf_a + 0); |
205 | 457k | mod_b(buf_b + 16, buf_a + 16); |
206 | 457k | clp_v(buf_a, 32); |
207 | | |
208 | 457k | mod_c(buf_a, buf_b); |
209 | | |
210 | 15.0M | for (i = 0; i < 32; i++) |
211 | 14.6M | buf_b[i] = clip23(buf_b[i] * (1 << shift)); |
212 | | |
213 | 7.77M | for (i = 0, k = 31; i < 16; i++, k--) { |
214 | 7.32M | output[ i] = clip23(buf_b[i] - buf_b[k]); |
215 | 7.32M | output[16 + i] = clip23(buf_b[i] + buf_b[k]); |
216 | 7.32M | } |
217 | 457k | } |
218 | | |
219 | | static void mod64_a(const int *input, int *output) |
220 | 0 | { |
221 | 0 | static const int cos_mod[32] = { |
222 | 0 | 4195568, 4205700, 4226086, 4256977, |
223 | 0 | 4298755, 4351949, 4417251, 4495537, |
224 | 0 | 4587901, 4695690, 4820557, 4964534, |
225 | 0 | 5130115, 5320382, 5539164, 5791261, |
226 | 0 | -6082752, -6421430, -6817439, -7284203, |
227 | 0 | -7839855, -8509474, -9328732, -10350140, |
228 | 0 | -11654242, -13371208, -15725922, -19143224, |
229 | 0 | -24533560, -34264200, -57015280, -170908480 |
230 | 0 | }; |
231 | |
|
232 | 0 | int i, k; |
233 | |
|
234 | 0 | for (i = 0; i < 16; i++) |
235 | 0 | output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); |
236 | |
|
237 | 0 | for (i = 16, k = 15; i < 32; i++, k--) |
238 | 0 | output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); |
239 | 0 | } |
240 | | |
241 | | static void mod64_b(int *input, int *output) |
242 | 0 | { |
243 | 0 | static const int cos_mod[16] = { |
244 | 0 | 4199362, 4240198, 4323885, 4454708, |
245 | 0 | 4639772, 4890013, 5221943, 5660703, |
246 | 0 | 6245623, 7040975, 8158494, 9809974, |
247 | 0 | 12450076, 17261920, 28585092, 85479984 |
248 | 0 | }; |
249 | |
|
250 | 0 | int i, k; |
251 | |
|
252 | 0 | for (i = 0; i < 16; i++) |
253 | 0 | input[16 + i] = mul23(cos_mod[i], input[16 + i]); |
254 | |
|
255 | 0 | for (i = 0; i < 16; i++) |
256 | 0 | output[i] = input[i] + input[16 + i]; |
257 | |
|
258 | 0 | for (i = 16, k = 15; i < 32; i++, k--) |
259 | 0 | output[i] = input[k] - input[16 + k]; |
260 | 0 | } |
261 | | |
262 | | static void mod64_c(const int *input, int *output) |
263 | 0 | { |
264 | 0 | static const int cos_mod[64] = { |
265 | 0 | 741511, 741958, 742853, 744199, |
266 | 0 | 746001, 748262, 750992, 754197, |
267 | 0 | 757888, 762077, 766777, 772003, |
268 | 0 | 777772, 784105, 791021, 798546, |
269 | 0 | 806707, 815532, 825054, 835311, |
270 | 0 | 846342, 858193, 870912, 884554, |
271 | 0 | 899181, 914860, 931667, 949686, |
272 | 0 | 969011, 989747, 1012012, 1035941, |
273 | 0 | -1061684, -1089412, -1119320, -1151629, |
274 | 0 | -1186595, -1224511, -1265719, -1310613, |
275 | 0 | -1359657, -1413400, -1472490, -1537703, |
276 | 0 | -1609974, -1690442, -1780506, -1881904, |
277 | 0 | -1996824, -2128058, -2279225, -2455101, |
278 | 0 | -2662128, -2909200, -3208956, -3579983, |
279 | 0 | -4050785, -4667404, -5509372, -6726913, |
280 | 0 | -8641940, -12091426, -20144284, -60420720 |
281 | 0 | }; |
282 | |
|
283 | 0 | int i, k; |
284 | |
|
285 | 0 | for (i = 0; i < 32; i++) |
286 | 0 | output[i] = mul23(cos_mod[i], input[i] + input[32 + i]); |
287 | |
|
288 | 0 | for (i = 32, k = 31; i < 64; i++, k--) |
289 | 0 | output[i] = mul23(cos_mod[i], input[k] - input[32 + k]); |
290 | 0 | } |
291 | | |
292 | | static void imdct_half_64(int32_t *output, const int32_t *input) |
293 | 0 | { |
294 | 0 | int buf_a[64], buf_b[64]; |
295 | 0 | int i, k, mag, shift, round; |
296 | |
|
297 | 0 | mag = 0; |
298 | 0 | for (i = 0; i < 64; i++) |
299 | 0 | mag += abs(input[i]); |
300 | |
|
301 | 0 | shift = mag > 0x400000 ? 2 : 0; |
302 | 0 | round = shift > 0 ? 1 << (shift - 1) : 0; |
303 | |
|
304 | 0 | for (i = 0; i < 64; i++) |
305 | 0 | buf_a[i] = (input[i] + round) >> shift; |
306 | |
|
307 | 0 | sum_a(buf_a, buf_b + 0, 32); |
308 | 0 | sum_b(buf_a, buf_b + 32, 32); |
309 | 0 | clp_v(buf_b, 64); |
310 | |
|
311 | 0 | sum_a(buf_b + 0, buf_a + 0, 16); |
312 | 0 | sum_b(buf_b + 0, buf_a + 16, 16); |
313 | 0 | sum_c(buf_b + 32, buf_a + 32, 16); |
314 | 0 | sum_d(buf_b + 32, buf_a + 48, 16); |
315 | 0 | clp_v(buf_a, 64); |
316 | |
|
317 | 0 | sum_a(buf_a + 0, buf_b + 0, 8); |
318 | 0 | sum_b(buf_a + 0, buf_b + 8, 8); |
319 | 0 | sum_c(buf_a + 16, buf_b + 16, 8); |
320 | 0 | sum_d(buf_a + 16, buf_b + 24, 8); |
321 | 0 | sum_c(buf_a + 32, buf_b + 32, 8); |
322 | 0 | sum_d(buf_a + 32, buf_b + 40, 8); |
323 | 0 | sum_c(buf_a + 48, buf_b + 48, 8); |
324 | 0 | sum_d(buf_a + 48, buf_b + 56, 8); |
325 | 0 | clp_v(buf_b, 64); |
326 | |
|
327 | 0 | dct_a(buf_b + 0, buf_a + 0); |
328 | 0 | dct_b(buf_b + 8, buf_a + 8); |
329 | 0 | dct_b(buf_b + 16, buf_a + 16); |
330 | 0 | dct_b(buf_b + 24, buf_a + 24); |
331 | 0 | dct_b(buf_b + 32, buf_a + 32); |
332 | 0 | dct_b(buf_b + 40, buf_a + 40); |
333 | 0 | dct_b(buf_b + 48, buf_a + 48); |
334 | 0 | dct_b(buf_b + 56, buf_a + 56); |
335 | 0 | clp_v(buf_a, 64); |
336 | |
|
337 | 0 | mod_a(buf_a + 0, buf_b + 0); |
338 | 0 | mod_b(buf_a + 16, buf_b + 16); |
339 | 0 | mod_b(buf_a + 32, buf_b + 32); |
340 | 0 | mod_b(buf_a + 48, buf_b + 48); |
341 | 0 | clp_v(buf_b, 64); |
342 | |
|
343 | 0 | mod64_a(buf_b + 0, buf_a + 0); |
344 | 0 | mod64_b(buf_b + 32, buf_a + 32); |
345 | 0 | clp_v(buf_a, 64); |
346 | |
|
347 | 0 | mod64_c(buf_a, buf_b); |
348 | |
|
349 | 0 | for (i = 0; i < 64; i++) |
350 | 0 | buf_b[i] = clip23(buf_b[i] * (1 << shift)); |
351 | |
|
352 | 0 | for (i = 0, k = 63; i < 32; i++, k--) { |
353 | 0 | output[ i] = clip23(buf_b[i] - buf_b[k]); |
354 | 0 | output[32 + i] = clip23(buf_b[i] + buf_b[k]); |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | | av_cold void ff_dcadct_init(DCADCTContext *c) |
359 | 10.3k | { |
360 | 10.3k | c->imdct_half[0] = imdct_half_32; |
361 | 10.3k | c->imdct_half[1] = imdct_half_64; |
362 | 10.3k | } |