/src/ffmpeg/libavcodec/vc2enc_dwt.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2007 Marco Gerards <marco@gnu.org> |
3 | | * Copyright (C) 2016 Open Broadcast Systems Ltd. |
4 | | * Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | #include "libavutil/attributes.h" |
24 | | #include "libavutil/mem.h" |
25 | | #include "vc2enc_dwt.h" |
26 | | |
27 | | /* Since the transforms spit out interleaved coefficients, this function |
28 | | * rearranges the coefficients into the more traditional subdivision, |
29 | | * making it easier to encode and perform another level. */ |
30 | | static av_always_inline void deinterleave(dwtcoef *linell, ptrdiff_t stride, |
31 | | int width, int height, dwtcoef *synthl) |
32 | 141k | { |
33 | 141k | int x, y; |
34 | 141k | ptrdiff_t synthw = width << 1; |
35 | 141k | dwtcoef *linehl = linell + width; |
36 | 141k | dwtcoef *linelh = linell + height*stride; |
37 | 141k | dwtcoef *linehh = linelh + width; |
38 | | |
39 | | /* Deinterleave the coefficients. */ |
40 | 1.48M | for (y = 0; y < height; y++) { |
41 | 33.3M | for (x = 0; x < width; x++) { |
42 | 32.0M | linell[x] = synthl[(x << 1)]; |
43 | 32.0M | linehl[x] = synthl[(x << 1) + 1]; |
44 | 32.0M | linelh[x] = synthl[(x << 1) + synthw]; |
45 | 32.0M | linehh[x] = synthl[(x << 1) + synthw + 1]; |
46 | 32.0M | } |
47 | 1.34M | synthl += synthw << 1; |
48 | 1.34M | linell += stride; |
49 | 1.34M | linelh += stride; |
50 | 1.34M | linehl += stride; |
51 | 1.34M | linehh += stride; |
52 | 1.34M | } |
53 | 141k | } |
54 | | |
55 | | static void vc2_subband_dwt_97(VC2TransformContext *t, dwtcoef *data, |
56 | | ptrdiff_t stride, int width, int height) |
57 | 141k | { |
58 | 141k | int x, y; |
59 | 141k | dwtcoef *datal = data, *synth = t->buffer, *synthl = synth; |
60 | 141k | const ptrdiff_t synth_width = width << 1; |
61 | 141k | const ptrdiff_t synth_height = height << 1; |
62 | | |
63 | | /* |
64 | | * Shift in one bit that is used for additional precision and copy |
65 | | * the data to the buffer. |
66 | | */ |
67 | 2.83M | for (y = 0; y < synth_height; y++) { |
68 | 130M | for (x = 0; x < synth_width; x++) |
69 | 128M | synthl[x] = datal[x] * 2; |
70 | 2.69M | synthl += synth_width; |
71 | 2.69M | datal += stride; |
72 | 2.69M | } |
73 | | |
74 | | /* Horizontal synthesis. */ |
75 | 141k | synthl = synth; |
76 | 2.83M | for (y = 0; y < synth_height; y++) { |
77 | | /* Lifting stage 2. */ |
78 | 2.69M | synthl[1] -= (8*synthl[0] + 9*synthl[2] - synthl[4] + 8) >> 4; |
79 | 58.9M | for (x = 1; x < width - 2; x++) |
80 | 56.3M | synthl[2*x + 1] -= (9*synthl[2*x] + 9*synthl[2*x + 2] - synthl[2*x + 4] - |
81 | 56.3M | synthl[2 * x - 2] + 8) >> 4; |
82 | 2.69M | synthl[synth_width - 1] -= (17*synthl[synth_width - 2] - |
83 | 2.69M | synthl[synth_width - 4] + 8) >> 4; |
84 | 2.69M | synthl[synth_width - 3] -= (8*synthl[synth_width - 2] + |
85 | 2.69M | 9*synthl[synth_width - 4] - |
86 | 2.69M | synthl[synth_width - 6] + 8) >> 4; |
87 | | /* Lifting stage 1. */ |
88 | 2.69M | synthl[0] += (synthl[1] + synthl[1] + 2) >> 2; |
89 | 61.4M | for (x = 1; x < width - 1; x++) |
90 | 58.7M | synthl[2*x] += (synthl[2*x - 1] + synthl[2*x + 1] + 2) >> 2; |
91 | | |
92 | 2.69M | synthl[synth_width - 2] += (synthl[synth_width - 3] + |
93 | 2.69M | synthl[synth_width - 1] + 2) >> 2; |
94 | 2.69M | synthl += synth_width; |
95 | 2.69M | } |
96 | | |
97 | | /* Vertical synthesis: Lifting stage 2. */ |
98 | 141k | synthl = synth + synth_width; |
99 | 4.00M | for (x = 0; x < synth_width; x++) |
100 | 3.86M | synthl[x] -= (8*synthl[x - synth_width] + 9*synthl[x + synth_width] - |
101 | 3.86M | synthl[x + 3 * synth_width] + 8) >> 4; |
102 | | |
103 | 141k | synthl = synth + (synth_width << 1); |
104 | 1.14M | for (y = 1; y < height - 2; y++) { |
105 | 54.0M | for (x = 0; x < synth_width; x++) |
106 | 53.0M | synthl[x + synth_width] -= (9*synthl[x] + |
107 | 53.0M | 9*synthl[x + 2 * synth_width] - |
108 | 53.0M | synthl[x - 2 * synth_width] - |
109 | 53.0M | synthl[x + 4 * synth_width] + 8) >> 4; |
110 | 1.00M | synthl += synth_width << 1; |
111 | 1.00M | } |
112 | | |
113 | 141k | synthl = synth + (synth_height - 1) * synth_width; |
114 | 4.00M | for (x = 0; x < synth_width; x++) { |
115 | 3.86M | synthl[x] -= (17*synthl[x - synth_width] - |
116 | 3.86M | synthl[x - 3*synth_width] + 8) >> 4; |
117 | 3.86M | synthl[x - 2*synth_width] -= (9*synthl[x - 3*synth_width] + |
118 | 3.86M | 8*synthl[x - 1*synth_width] - synthl[x - 5*synth_width] + 8) >> 4; |
119 | 3.86M | } |
120 | | |
121 | | /* Vertical synthesis: Lifting stage 1. */ |
122 | 141k | synthl = synth; |
123 | 4.00M | for (x = 0; x < synth_width; x++) |
124 | 3.86M | synthl[x] += (synthl[x + synth_width] + synthl[x + synth_width] + 2) >> 2; |
125 | | |
126 | 141k | synthl = synth + (synth_width << 1); |
127 | 1.22M | for (y = 1; y < height - 1; y++) { |
128 | 57.5M | for (x = 0; x < synth_width; x++) |
129 | 56.4M | synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
130 | 1.08M | synthl += synth_width << 1; |
131 | 1.08M | } |
132 | | |
133 | 141k | synthl = synth + (synth_height - 2) * synth_width; |
134 | 4.00M | for (x = 0; x < synth_width; x++) |
135 | 3.86M | synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
136 | | |
137 | 141k | deinterleave(data, stride, width, height, synth); |
138 | 141k | } |
139 | | |
140 | | static void vc2_subband_dwt_53(VC2TransformContext *t, dwtcoef *data, |
141 | | ptrdiff_t stride, int width, int height) |
142 | 0 | { |
143 | 0 | int x, y; |
144 | 0 | dwtcoef *synth = t->buffer, *synthl = synth, *datal = data; |
145 | 0 | const ptrdiff_t synth_width = width << 1; |
146 | 0 | const ptrdiff_t synth_height = height << 1; |
147 | | |
148 | | /* |
149 | | * Shift in one bit that is used for additional precision and copy |
150 | | * the data to the buffer. |
151 | | */ |
152 | 0 | for (y = 0; y < synth_height; y++) { |
153 | 0 | for (x = 0; x < synth_width; x++) |
154 | 0 | synthl[x] = datal[x] * 2; |
155 | 0 | synthl += synth_width; |
156 | 0 | datal += stride; |
157 | 0 | } |
158 | | |
159 | | /* Horizontal synthesis. */ |
160 | 0 | synthl = synth; |
161 | 0 | for (y = 0; y < synth_height; y++) { |
162 | | /* Lifting stage 2. */ |
163 | 0 | for (x = 0; x < width - 1; x++) |
164 | 0 | synthl[2 * x + 1] -= (synthl[2 * x] + synthl[2 * x + 2] + 1) >> 1; |
165 | |
|
166 | 0 | synthl[synth_width - 1] -= (2*synthl[synth_width - 2] + 1) >> 1; |
167 | | |
168 | | /* Lifting stage 1. */ |
169 | 0 | synthl[0] += (2*synthl[1] + 2) >> 2; |
170 | 0 | for (x = 1; x < width - 1; x++) |
171 | 0 | synthl[2 * x] += (synthl[2 * x - 1] + synthl[2 * x + 1] + 2) >> 2; |
172 | |
|
173 | 0 | synthl[synth_width - 2] += (synthl[synth_width - 3] + synthl[synth_width - 1] + 2) >> 2; |
174 | |
|
175 | 0 | synthl += synth_width; |
176 | 0 | } |
177 | | |
178 | | /* Vertical synthesis: Lifting stage 2. */ |
179 | 0 | synthl = synth + synth_width; |
180 | 0 | for (x = 0; x < synth_width; x++) |
181 | 0 | synthl[x] -= (synthl[x - synth_width] + synthl[x + synth_width] + 1) >> 1; |
182 | |
|
183 | 0 | synthl = synth + (synth_width << 1); |
184 | 0 | for (y = 1; y < height - 1; y++) { |
185 | 0 | for (x = 0; x < synth_width; x++) |
186 | 0 | synthl[x + synth_width] -= (synthl[x] + synthl[x + synth_width * 2] + 1) >> 1; |
187 | 0 | synthl += (synth_width << 1); |
188 | 0 | } |
189 | |
|
190 | 0 | synthl = synth + (synth_height - 1) * synth_width; |
191 | 0 | for (x = 0; x < synth_width; x++) |
192 | 0 | synthl[x] -= (2*synthl[x - synth_width] + 1) >> 1; |
193 | | |
194 | | /* Vertical synthesis: Lifting stage 1. */ |
195 | 0 | synthl = synth; |
196 | 0 | for (x = 0; x < synth_width; x++) |
197 | 0 | synthl[x] += (2*synthl[synth_width + x] + 2) >> 2; |
198 | |
|
199 | 0 | synthl = synth + (synth_width << 1); |
200 | 0 | for (y = 1; y < height - 1; y++) { |
201 | 0 | for (x = 0; x < synth_width; x++) |
202 | 0 | synthl[x] += (synthl[x + synth_width] + synthl[x - synth_width] + 2) >> 2; |
203 | 0 | synthl += (synth_width << 1); |
204 | 0 | } |
205 | |
|
206 | 0 | synthl = synth + (synth_height - 2)*synth_width; |
207 | 0 | for (x = 0; x < synth_width; x++) |
208 | 0 | synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
209 | | |
210 | |
|
211 | 0 | deinterleave(data, stride, width, height, synth); |
212 | 0 | } |
213 | | |
214 | | static av_always_inline void dwt_haar(VC2TransformContext *t, dwtcoef *data, |
215 | | ptrdiff_t stride, int width, int height, |
216 | | const int s) |
217 | 0 | { |
218 | 0 | int x, y; |
219 | 0 | dwtcoef *synth = t->buffer, *synthl = synth, *datal = data; |
220 | 0 | const ptrdiff_t synth_width = width << 1; |
221 | 0 | const ptrdiff_t synth_height = height << 1; |
222 | | |
223 | | /* Horizontal synthesis. */ |
224 | 0 | for (y = 0; y < synth_height; y++) { |
225 | 0 | for (x = 0; x < synth_width; x += 2) { |
226 | 0 | synthl[y*synth_width + x + 1] = (datal[y*stride + x + 1] - datal[y*stride + x]) * (1 << s); |
227 | 0 | synthl[y*synth_width + x] = datal[y*stride + x + 0] * (1 << s) + |
228 | 0 | ((synthl[y*synth_width + x + 1] + 1) >> 1); |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | /* Vertical synthesis. */ |
233 | 0 | for (x = 0; x < synth_width; x++) { |
234 | 0 | for (y = 0; y < synth_height; y += 2) { |
235 | 0 | synthl[(y + 1)*synth_width + x] = synthl[(y + 1)*synth_width + x] - |
236 | 0 | synthl[y*synth_width + x]; |
237 | 0 | synthl[y*synth_width + x] = synthl[y*synth_width + x] + |
238 | 0 | ((synthl[(y + 1)*synth_width + x] + 1) >> 1); |
239 | 0 | } |
240 | 0 | } |
241 | |
|
242 | 0 | deinterleave(data, stride, width, height, synth); |
243 | 0 | } |
244 | | |
245 | | static void vc2_subband_dwt_haar(VC2TransformContext *t, dwtcoef *data, |
246 | | ptrdiff_t stride, int width, int height) |
247 | 0 | { |
248 | 0 | dwt_haar(t, data, stride, width, height, 0); |
249 | 0 | } |
250 | | |
251 | | static void vc2_subband_dwt_haar_shift(VC2TransformContext *t, dwtcoef *data, |
252 | | ptrdiff_t stride, int width, int height) |
253 | 0 | { |
254 | 0 | dwt_haar(t, data, stride, width, height, 1); |
255 | 0 | } |
256 | | |
257 | | av_cold int ff_vc2enc_init_transforms(VC2TransformContext *s, int p_stride, |
258 | | int p_height, int slice_w, int slice_h) |
259 | 2.27k | { |
260 | 2.27k | s->vc2_subband_dwt[VC2_TRANSFORM_9_7] = vc2_subband_dwt_97; |
261 | 2.27k | s->vc2_subband_dwt[VC2_TRANSFORM_5_3] = vc2_subband_dwt_53; |
262 | 2.27k | s->vc2_subband_dwt[VC2_TRANSFORM_HAAR] = vc2_subband_dwt_haar; |
263 | 2.27k | s->vc2_subband_dwt[VC2_TRANSFORM_HAAR_S] = vc2_subband_dwt_haar_shift; |
264 | | |
265 | | /* Pad by the slice size, only matters for non-Haar wavelets */ |
266 | 2.27k | s->buffer = av_calloc((p_stride + slice_w)*(p_height + slice_h), sizeof(dwtcoef)); |
267 | 2.27k | if (!s->buffer) |
268 | 0 | return 1; |
269 | | |
270 | 2.27k | s->padding = (slice_h >> 1)*p_stride + (slice_w >> 1); |
271 | 2.27k | s->buffer += s->padding; |
272 | | |
273 | 2.27k | return 0; |
274 | 2.27k | } |
275 | | |
276 | | av_cold void ff_vc2enc_free_transforms(VC2TransformContext *s) |
277 | 2.33k | { |
278 | 2.33k | if (s->buffer) { |
279 | 2.27k | av_free(s->buffer - s->padding); |
280 | | s->buffer = NULL; |
281 | 2.27k | } |
282 | 2.33k | } |