/src/freeimage-svn/FreeImage/trunk/Source/LibOpenJPEG/dwt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2002-2007, Communications and Remote Sensing Laboratory, Universite catholique de Louvain (UCL), Belgium |
3 | | * Copyright (c) 2002-2007, Professor Benoit Macq |
4 | | * Copyright (c) 2001-2003, David Janssens |
5 | | * Copyright (c) 2002-2003, Yannick Verschueren |
6 | | * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe |
7 | | * Copyright (c) 2005, Herve Drolon, FreeImage Team |
8 | | * Copyright (c) 2007, Jonathan Ballard <dzonatas@dzonux.net> |
9 | | * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com> |
10 | | * All rights reserved. |
11 | | * |
12 | | * Redistribution and use in source and binary forms, with or without |
13 | | * modification, are permitted provided that the following conditions |
14 | | * are met: |
15 | | * 1. Redistributions of source code must retain the above copyright |
16 | | * notice, this list of conditions and the following disclaimer. |
17 | | * 2. Redistributions in binary form must reproduce the above copyright |
18 | | * notice, this list of conditions and the following disclaimer in the |
19 | | * documentation and/or other materials provided with the distribution. |
20 | | * |
21 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' |
22 | | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
23 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
24 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
25 | | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
26 | | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
27 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
28 | | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
29 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
30 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
31 | | * POSSIBILITY OF SUCH DAMAGE. |
32 | | */ |
33 | | |
34 | | #ifdef __SSE__ |
35 | | #include <xmmintrin.h> |
36 | | #endif |
37 | | |
38 | | #include "opj_includes.h" |
39 | | |
40 | | /** @defgroup DWT DWT - Implementation of a discrete wavelet transform */ |
41 | | /*@{*/ |
42 | | |
43 | | #define OPJ_WS(i) v->mem[(i)*2] |
44 | | #define OPJ_WD(i) v->mem[(1+(i)*2)] |
45 | | |
46 | | /** @name Local data structures */ |
47 | | /*@{*/ |
48 | | |
49 | | typedef struct dwt_local { |
50 | | OPJ_INT32* mem; |
51 | | OPJ_INT32 dn; |
52 | | OPJ_INT32 sn; |
53 | | OPJ_INT32 cas; |
54 | | } opj_dwt_t; |
55 | | |
56 | | typedef union { |
57 | | OPJ_FLOAT32 f[4]; |
58 | | } opj_v4_t; |
59 | | |
60 | | typedef struct v4dwt_local { |
61 | | opj_v4_t* wavelet ; |
62 | | OPJ_INT32 dn ; |
63 | | OPJ_INT32 sn ; |
64 | | OPJ_INT32 cas ; |
65 | | } opj_v4dwt_t ; |
66 | | |
67 | | static const OPJ_FLOAT32 opj_dwt_alpha = 1.586134342f; /* 12994 */ |
68 | | static const OPJ_FLOAT32 opj_dwt_beta = 0.052980118f; /* 434 */ |
69 | | static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /* -7233 */ |
70 | | static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /* -3633 */ |
71 | | |
72 | | static const OPJ_FLOAT32 opj_K = 1.230174105f; /* 10078 */ |
73 | | static const OPJ_FLOAT32 opj_c13318 = 1.625732422f; |
74 | | |
75 | | /*@}*/ |
76 | | |
77 | | /** |
78 | | Virtual function type for wavelet transform in 1-D |
79 | | */ |
80 | | typedef void (*DWT1DFN)(opj_dwt_t* v); |
81 | | |
82 | | /** @name Local static functions */ |
83 | | /*@{*/ |
84 | | |
85 | | /** |
86 | | Forward lazy transform (horizontal) |
87 | | */ |
88 | | static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); |
89 | | /** |
90 | | Forward lazy transform (vertical) |
91 | | */ |
92 | | static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas); |
93 | | /** |
94 | | Inverse lazy transform (horizontal) |
95 | | */ |
96 | | static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a); |
97 | | /** |
98 | | Inverse lazy transform (vertical) |
99 | | */ |
100 | | static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x); |
101 | | /** |
102 | | Forward 5-3 wavelet transform in 1-D |
103 | | */ |
104 | | static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); |
105 | | /** |
106 | | Inverse 5-3 wavelet transform in 1-D |
107 | | */ |
108 | | static void opj_dwt_decode_1(opj_dwt_t *v); |
109 | | static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); |
110 | | /** |
111 | | Forward 9-7 wavelet transform in 1-D |
112 | | */ |
113 | | static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); |
114 | | /** |
115 | | Explicit calculation of the Quantization Stepsizes |
116 | | */ |
117 | | static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize); |
118 | | /** |
119 | | Inverse wavelet transform in 2-D. |
120 | | */ |
121 | | static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); |
122 | | |
123 | | static OPJ_BOOL opj_dwt_encode_procedure( opj_tcd_tilecomp_t * tilec, |
124 | | void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) ); |
125 | | |
126 | | static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i); |
127 | | |
128 | | /* <summary> */ |
129 | | /* Inverse 9-7 wavelet transform in 1-D. */ |
130 | | /* </summary> */ |
131 | | static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); |
132 | | |
133 | | static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size); |
134 | | |
135 | | static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); |
136 | | |
137 | | #ifdef __SSE__ |
138 | | static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c); |
139 | | |
140 | | static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c); |
141 | | |
142 | | #else |
143 | | static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c); |
144 | | |
145 | | static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c); |
146 | | |
147 | | #endif |
148 | | |
149 | | /*@}*/ |
150 | | |
151 | | /*@}*/ |
152 | | |
153 | 0 | #define OPJ_S(i) a[(i)*2] |
154 | 0 | #define OPJ_D(i) a[(1+(i)*2)] |
155 | 0 | #define OPJ_S_(i) ((i)<0?OPJ_S(0):((i)>=sn?OPJ_S(sn-1):OPJ_S(i))) |
156 | 0 | #define OPJ_D_(i) ((i)<0?OPJ_D(0):((i)>=dn?OPJ_D(dn-1):OPJ_D(i))) |
157 | | /* new */ |
158 | 0 | #define OPJ_SS_(i) ((i)<0?OPJ_S(0):((i)>=dn?OPJ_S(dn-1):OPJ_S(i))) |
159 | 0 | #define OPJ_DD_(i) ((i)<0?OPJ_D(0):((i)>=sn?OPJ_D(sn-1):OPJ_D(i))) |
160 | | |
161 | | /* <summary> */ |
162 | | /* This table contains the norms of the 5-3 wavelets for different bands. */ |
163 | | /* </summary> */ |
164 | | static const OPJ_FLOAT64 opj_dwt_norms[4][10] = { |
165 | | {1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3}, |
166 | | {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, |
167 | | {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, |
168 | | {.7186, .9218, 1.586, 3.043, 6.019, 12.01, 24.00, 47.97, 95.93} |
169 | | }; |
170 | | |
171 | | /* <summary> */ |
172 | | /* This table contains the norms of the 9-7 wavelets for different bands. */ |
173 | | /* </summary> */ |
174 | | static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { |
175 | | {1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9}, |
176 | | {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, |
177 | | {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, |
178 | | {2.080, 3.865, 8.307, 17.18, 34.71, 69.59, 139.3, 278.6, 557.2} |
179 | | }; |
180 | | |
181 | | /* |
182 | | ========================================================== |
183 | | local functions |
184 | | ========================================================== |
185 | | */ |
186 | | |
187 | | /* <summary> */ |
188 | | /* Forward lazy transform (horizontal). */ |
189 | | /* </summary> */ |
190 | 0 | void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { |
191 | 0 | OPJ_INT32 i; |
192 | 0 | OPJ_INT32 * l_dest = b; |
193 | 0 | OPJ_INT32 * l_src = a+cas; |
194 | |
|
195 | 0 | for (i=0; i<sn; ++i) { |
196 | 0 | *l_dest++ = *l_src; |
197 | 0 | l_src += 2; |
198 | 0 | } |
199 | | |
200 | 0 | l_dest = b + sn; |
201 | 0 | l_src = a + 1 - cas; |
202 | |
|
203 | 0 | for (i=0; i<dn; ++i) { |
204 | 0 | *l_dest++=*l_src; |
205 | 0 | l_src += 2; |
206 | 0 | } |
207 | 0 | } |
208 | | |
209 | | /* <summary> */ |
210 | | /* Forward lazy transform (vertical). */ |
211 | | /* </summary> */ |
212 | 0 | void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) { |
213 | 0 | OPJ_INT32 i = sn; |
214 | 0 | OPJ_INT32 * l_dest = b; |
215 | 0 | OPJ_INT32 * l_src = a+cas; |
216 | |
|
217 | 0 | while (i--) { |
218 | 0 | *l_dest = *l_src; |
219 | 0 | l_dest += x; |
220 | 0 | l_src += 2; |
221 | 0 | } /* b[i*x]=a[2*i+cas]; */ |
222 | |
|
223 | 0 | l_dest = b + sn * x; |
224 | 0 | l_src = a + 1 - cas; |
225 | | |
226 | 0 | i = dn; |
227 | 0 | while (i--) { |
228 | 0 | *l_dest = *l_src; |
229 | 0 | l_dest += x; |
230 | 0 | l_src += 2; |
231 | 0 | } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/ |
232 | 0 | } |
233 | | |
234 | | /* <summary> */ |
235 | | /* Inverse lazy transform (horizontal). */ |
236 | | /* </summary> */ |
237 | 0 | void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a) { |
238 | 0 | OPJ_INT32 *ai = a; |
239 | 0 | OPJ_INT32 *bi = h->mem + h->cas; |
240 | 0 | OPJ_INT32 i = h->sn; |
241 | 0 | while( i-- ) { |
242 | 0 | *bi = *(ai++); |
243 | 0 | bi += 2; |
244 | 0 | } |
245 | 0 | ai = a + h->sn; |
246 | 0 | bi = h->mem + 1 - h->cas; |
247 | 0 | i = h->dn ; |
248 | 0 | while( i-- ) { |
249 | 0 | *bi = *(ai++); |
250 | 0 | bi += 2; |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | | /* <summary> */ |
255 | | /* Inverse lazy transform (vertical). */ |
256 | | /* </summary> */ |
257 | 0 | void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) { |
258 | 0 | OPJ_INT32 *ai = a; |
259 | 0 | OPJ_INT32 *bi = v->mem + v->cas; |
260 | 0 | OPJ_INT32 i = v->sn; |
261 | 0 | while( i-- ) { |
262 | 0 | *bi = *ai; |
263 | 0 | bi += 2; |
264 | 0 | ai += x; |
265 | 0 | } |
266 | 0 | ai = a + (v->sn * x); |
267 | 0 | bi = v->mem + 1 - v->cas; |
268 | 0 | i = v->dn ; |
269 | 0 | while( i-- ) { |
270 | 0 | *bi = *ai; |
271 | 0 | bi += 2; |
272 | 0 | ai += x; |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | | |
277 | | /* <summary> */ |
278 | | /* Forward 5-3 wavelet transform in 1-D. */ |
279 | | /* </summary> */ |
280 | 0 | void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { |
281 | 0 | OPJ_INT32 i; |
282 | | |
283 | 0 | if (!cas) { |
284 | 0 | if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ |
285 | 0 | for (i = 0; i < dn; i++) OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; |
286 | 0 | for (i = 0; i < sn; i++) OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; |
287 | 0 | } |
288 | 0 | } else { |
289 | 0 | if (!sn && dn == 1) /* NEW : CASE ONE ELEMENT */ |
290 | 0 | OPJ_S(0) *= 2; |
291 | 0 | else { |
292 | 0 | for (i = 0; i < dn; i++) OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; |
293 | 0 | for (i = 0; i < sn; i++) OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; |
294 | 0 | } |
295 | 0 | } |
296 | 0 | } |
297 | | |
298 | | /* <summary> */ |
299 | | /* Inverse 5-3 wavelet transform in 1-D. */ |
300 | | /* </summary> */ |
301 | 0 | void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { |
302 | 0 | OPJ_INT32 i; |
303 | | |
304 | 0 | if (!cas) { |
305 | 0 | if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ |
306 | 0 | for (i = 0; i < sn; i++) OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; |
307 | 0 | for (i = 0; i < dn; i++) OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; |
308 | 0 | } |
309 | 0 | } else { |
310 | 0 | if (!sn && dn == 1) /* NEW : CASE ONE ELEMENT */ |
311 | 0 | OPJ_S(0) /= 2; |
312 | 0 | else { |
313 | 0 | for (i = 0; i < sn; i++) OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; |
314 | 0 | for (i = 0; i < dn; i++) OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | | /* <summary> */ |
320 | | /* Inverse 5-3 wavelet transform in 1-D. */ |
321 | | /* </summary> */ |
322 | 0 | void opj_dwt_decode_1(opj_dwt_t *v) { |
323 | 0 | opj_dwt_decode_1_(v->mem, v->dn, v->sn, v->cas); |
324 | 0 | } |
325 | | |
326 | | /* <summary> */ |
327 | | /* Forward 9-7 wavelet transform in 1-D. */ |
328 | | /* </summary> */ |
329 | 0 | void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { |
330 | 0 | OPJ_INT32 i; |
331 | 0 | if (!cas) { |
332 | 0 | if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ |
333 | 0 | for (i = 0; i < dn; i++) |
334 | 0 | OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993); |
335 | 0 | for (i = 0; i < sn; i++) |
336 | 0 | OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434); |
337 | 0 | for (i = 0; i < dn; i++) |
338 | 0 | OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233); |
339 | 0 | for (i = 0; i < sn; i++) |
340 | 0 | OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633); |
341 | 0 | for (i = 0; i < dn; i++) |
342 | 0 | OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */ |
343 | 0 | for (i = 0; i < sn; i++) |
344 | 0 | OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */ |
345 | 0 | } |
346 | 0 | } else { |
347 | 0 | if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ |
348 | 0 | for (i = 0; i < dn; i++) |
349 | 0 | OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993); |
350 | 0 | for (i = 0; i < sn; i++) |
351 | 0 | OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434); |
352 | 0 | for (i = 0; i < dn; i++) |
353 | 0 | OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233); |
354 | 0 | for (i = 0; i < sn; i++) |
355 | 0 | OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633); |
356 | 0 | for (i = 0; i < dn; i++) |
357 | 0 | OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */ |
358 | 0 | for (i = 0; i < sn; i++) |
359 | 0 | OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */ |
360 | 0 | } |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | 0 | void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize) { |
365 | 0 | OPJ_INT32 p, n; |
366 | 0 | p = opj_int_floorlog2(stepsize) - 13; |
367 | 0 | n = 11 - opj_int_floorlog2(stepsize); |
368 | 0 | bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; |
369 | 0 | bandno_stepsize->expn = numbps - p; |
370 | 0 | } |
371 | | |
372 | | /* |
373 | | ========================================================== |
374 | | DWT interface |
375 | | ========================================================== |
376 | | */ |
377 | | |
378 | | |
379 | | /* <summary> */ |
380 | | /* Forward 5-3 wavelet transform in 2-D. */ |
381 | | /* </summary> */ |
382 | | INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) ) |
383 | 0 | { |
384 | 0 | OPJ_INT32 i, j, k; |
385 | 0 | OPJ_INT32 *a = 00; |
386 | 0 | OPJ_INT32 *aj = 00; |
387 | 0 | OPJ_INT32 *bj = 00; |
388 | 0 | OPJ_INT32 w, l; |
389 | |
|
390 | 0 | OPJ_INT32 rw; /* width of the resolution level computed */ |
391 | 0 | OPJ_INT32 rh; /* height of the resolution level computed */ |
392 | 0 | OPJ_UINT32 l_data_size; |
393 | |
|
394 | 0 | opj_tcd_resolution_t * l_cur_res = 0; |
395 | 0 | opj_tcd_resolution_t * l_last_res = 0; |
396 | |
|
397 | 0 | w = tilec->x1-tilec->x0; |
398 | 0 | l = (OPJ_INT32)tilec->numresolutions-1; |
399 | 0 | a = tilec->data; |
400 | |
|
401 | 0 | l_cur_res = tilec->resolutions + l; |
402 | 0 | l_last_res = l_cur_res - 1; |
403 | |
|
404 | 0 | l_data_size = opj_dwt_max_resolution( tilec->resolutions,tilec->numresolutions) * (OPJ_UINT32)sizeof(OPJ_INT32); |
405 | 0 | bj = (OPJ_INT32*)opj_malloc((size_t)l_data_size); |
406 | 0 | if (! bj) { |
407 | 0 | return OPJ_FALSE; |
408 | 0 | } |
409 | 0 | i = l; |
410 | |
|
411 | 0 | while (i--) { |
412 | 0 | OPJ_INT32 rw1; /* width of the resolution level once lower than computed one */ |
413 | 0 | OPJ_INT32 rh1; /* height of the resolution level once lower than computed one */ |
414 | 0 | OPJ_INT32 cas_col; /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */ |
415 | 0 | OPJ_INT32 cas_row; /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */ |
416 | 0 | OPJ_INT32 dn, sn; |
417 | |
|
418 | 0 | rw = l_cur_res->x1 - l_cur_res->x0; |
419 | 0 | rh = l_cur_res->y1 - l_cur_res->y0; |
420 | 0 | rw1 = l_last_res->x1 - l_last_res->x0; |
421 | 0 | rh1 = l_last_res->y1 - l_last_res->y0; |
422 | |
|
423 | 0 | cas_row = l_cur_res->x0 & 1; |
424 | 0 | cas_col = l_cur_res->y0 & 1; |
425 | |
|
426 | 0 | sn = rh1; |
427 | 0 | dn = rh - rh1; |
428 | 0 | for (j = 0; j < rw; ++j) { |
429 | 0 | aj = a + j; |
430 | 0 | for (k = 0; k < rh; ++k) { |
431 | 0 | bj[k] = aj[k*w]; |
432 | 0 | } |
433 | |
|
434 | 0 | (*p_function) (bj, dn, sn, cas_col); |
435 | |
|
436 | 0 | opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); |
437 | 0 | } |
438 | |
|
439 | 0 | sn = rw1; |
440 | 0 | dn = rw - rw1; |
441 | |
|
442 | 0 | for (j = 0; j < rh; j++) { |
443 | 0 | aj = a + j * w; |
444 | 0 | for (k = 0; k < rw; k++) bj[k] = aj[k]; |
445 | 0 | (*p_function) (bj, dn, sn, cas_row); |
446 | 0 | opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); |
447 | 0 | } |
448 | |
|
449 | 0 | l_cur_res = l_last_res; |
450 | |
|
451 | 0 | --l_last_res; |
452 | 0 | } |
453 | |
|
454 | 0 | opj_free(bj); |
455 | 0 | return OPJ_TRUE; |
456 | 0 | } |
457 | | |
458 | | /* Forward 5-3 wavelet transform in 2-D. */ |
459 | | /* </summary> */ |
460 | | OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) |
461 | 0 | { |
462 | 0 | return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1); |
463 | 0 | } |
464 | | |
465 | | /* <summary> */ |
466 | | /* Inverse 5-3 wavelet transform in 2-D. */ |
467 | | /* </summary> */ |
468 | 0 | OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { |
469 | 0 | return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1); |
470 | 0 | } |
471 | | |
472 | | |
473 | | /* <summary> */ |
474 | | /* Get gain of 5-3 wavelet transform. */ |
475 | | /* </summary> */ |
476 | 0 | OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) { |
477 | 0 | if (orient == 0) |
478 | 0 | return 0; |
479 | 0 | if (orient == 1 || orient == 2) |
480 | 0 | return 1; |
481 | 0 | return 2; |
482 | 0 | } |
483 | | |
484 | | /* <summary> */ |
485 | | /* Get norm of 5-3 wavelet. */ |
486 | | /* </summary> */ |
487 | 0 | OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) { |
488 | 0 | return opj_dwt_norms[orient][level]; |
489 | 0 | } |
490 | | |
491 | | /* <summary> */ |
492 | | /* Forward 9-7 wavelet transform in 2-D. */ |
493 | | /* </summary> */ |
494 | | OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec) |
495 | 0 | { |
496 | 0 | return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1_real); |
497 | 0 | } |
498 | | |
499 | | /* <summary> */ |
500 | | /* Get gain of 9-7 wavelet transform. */ |
501 | | /* </summary> */ |
502 | 0 | OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) { |
503 | 0 | (void)orient; |
504 | 0 | return 0; |
505 | 0 | } |
506 | | |
507 | | /* <summary> */ |
508 | | /* Get norm of 9-7 wavelet. */ |
509 | | /* </summary> */ |
510 | 0 | OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) { |
511 | 0 | return opj_dwt_norms_real[orient][level]; |
512 | 0 | } |
513 | | |
514 | 0 | void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) { |
515 | 0 | OPJ_UINT32 numbands, bandno; |
516 | 0 | numbands = 3 * tccp->numresolutions - 2; |
517 | 0 | for (bandno = 0; bandno < numbands; bandno++) { |
518 | 0 | OPJ_FLOAT64 stepsize; |
519 | 0 | OPJ_UINT32 resno, level, orient, gain; |
520 | |
|
521 | 0 | resno = (bandno == 0) ? 0 : ((bandno - 1) / 3 + 1); |
522 | 0 | orient = (bandno == 0) ? 0 : ((bandno - 1) % 3 + 1); |
523 | 0 | level = tccp->numresolutions - 1 - resno; |
524 | 0 | gain = (tccp->qmfbid == 0) ? 0 : ((orient == 0) ? 0 : (((orient == 1) || (orient == 2)) ? 1 : 2)); |
525 | 0 | if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { |
526 | 0 | stepsize = 1.0; |
527 | 0 | } else { |
528 | 0 | OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; |
529 | 0 | stepsize = (1 << (gain)) / norm; |
530 | 0 | } |
531 | 0 | opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), (OPJ_INT32)(prec + gain), &tccp->stepsizes[bandno]); |
532 | 0 | } |
533 | 0 | } |
534 | | |
535 | | /* <summary> */ |
536 | | /* Determine maximum computed resolution level for inverse wavelet transform */ |
537 | | /* </summary> */ |
538 | 0 | OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i) { |
539 | 0 | OPJ_UINT32 mr = 0; |
540 | 0 | OPJ_UINT32 w; |
541 | 0 | while( --i ) { |
542 | 0 | ++r; |
543 | 0 | if( mr < ( w = (OPJ_UINT32)(r->x1 - r->x0) ) ) |
544 | 0 | mr = w ; |
545 | 0 | if( mr < ( w = (OPJ_UINT32)(r->y1 - r->y0) ) ) |
546 | 0 | mr = w ; |
547 | 0 | } |
548 | 0 | return mr ; |
549 | 0 | } |
550 | | |
551 | | /* <summary> */ |
552 | | /* Inverse wavelet transform in 2-D. */ |
553 | | /* </summary> */ |
554 | 0 | OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { |
555 | 0 | opj_dwt_t h; |
556 | 0 | opj_dwt_t v; |
557 | |
|
558 | 0 | opj_tcd_resolution_t* tr = tilec->resolutions; |
559 | |
|
560 | 0 | OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0); /* width of the resolution level computed */ |
561 | 0 | OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ |
562 | |
|
563 | 0 | OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); |
564 | |
|
565 | 0 | h.mem = (OPJ_INT32*) |
566 | 0 | opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32)); |
567 | 0 | if (! h.mem){ |
568 | 0 | return OPJ_FALSE; |
569 | 0 | } |
570 | | |
571 | 0 | v.mem = h.mem; |
572 | |
|
573 | 0 | while( --numres) { |
574 | 0 | OPJ_INT32 * restrict tiledp = tilec->data; |
575 | 0 | OPJ_UINT32 j; |
576 | |
|
577 | 0 | ++tr; |
578 | 0 | h.sn = (OPJ_INT32)rw; |
579 | 0 | v.sn = (OPJ_INT32)rh; |
580 | |
|
581 | 0 | rw = (OPJ_UINT32)(tr->x1 - tr->x0); |
582 | 0 | rh = (OPJ_UINT32)(tr->y1 - tr->y0); |
583 | |
|
584 | 0 | h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); |
585 | 0 | h.cas = tr->x0 % 2; |
586 | |
|
587 | 0 | for(j = 0; j < rh; ++j) { |
588 | 0 | opj_dwt_interleave_h(&h, &tiledp[j*w]); |
589 | 0 | (dwt_1D)(&h); |
590 | 0 | memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); |
591 | 0 | } |
592 | |
|
593 | 0 | v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); |
594 | 0 | v.cas = tr->y0 % 2; |
595 | |
|
596 | 0 | for(j = 0; j < rw; ++j){ |
597 | 0 | OPJ_UINT32 k; |
598 | 0 | opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); |
599 | 0 | (dwt_1D)(&v); |
600 | 0 | for(k = 0; k < rh; ++k) { |
601 | 0 | tiledp[k * w + j] = v.mem[k]; |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } |
605 | 0 | opj_aligned_free(h.mem); |
606 | 0 | return OPJ_TRUE; |
607 | 0 | } |
608 | | |
609 | 0 | void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){ |
610 | 0 | OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); |
611 | 0 | OPJ_INT32 count = w->sn; |
612 | 0 | OPJ_INT32 i, k; |
613 | |
|
614 | 0 | for(k = 0; k < 2; ++k){ |
615 | 0 | if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) { |
616 | | /* Fast code path */ |
617 | 0 | for(i = 0; i < count; ++i){ |
618 | 0 | OPJ_INT32 j = i; |
619 | 0 | bi[i*8 ] = a[j]; |
620 | 0 | j += x; |
621 | 0 | bi[i*8 + 1] = a[j]; |
622 | 0 | j += x; |
623 | 0 | bi[i*8 + 2] = a[j]; |
624 | 0 | j += x; |
625 | 0 | bi[i*8 + 3] = a[j]; |
626 | 0 | } |
627 | 0 | } |
628 | 0 | else { |
629 | | /* Slow code path */ |
630 | 0 | for(i = 0; i < count; ++i){ |
631 | 0 | OPJ_INT32 j = i; |
632 | 0 | bi[i*8 ] = a[j]; |
633 | 0 | j += x; |
634 | 0 | if(j >= size) continue; |
635 | 0 | bi[i*8 + 1] = a[j]; |
636 | 0 | j += x; |
637 | 0 | if(j >= size) continue; |
638 | 0 | bi[i*8 + 2] = a[j]; |
639 | 0 | j += x; |
640 | 0 | if(j >= size) continue; |
641 | 0 | bi[i*8 + 3] = a[j]; /* This one*/ |
642 | 0 | } |
643 | 0 | } |
644 | |
|
645 | 0 | bi = (OPJ_FLOAT32*) (w->wavelet + 1 - w->cas); |
646 | 0 | a += w->sn; |
647 | 0 | size -= w->sn; |
648 | 0 | count = w->dn; |
649 | 0 | } |
650 | 0 | } |
651 | | |
652 | 0 | void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){ |
653 | 0 | opj_v4_t* restrict bi = v->wavelet + v->cas; |
654 | 0 | OPJ_INT32 i; |
655 | |
|
656 | 0 | for(i = 0; i < v->sn; ++i){ |
657 | 0 | memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); |
658 | 0 | } |
659 | |
|
660 | 0 | a += v->sn * x; |
661 | 0 | bi = v->wavelet + 1 - v->cas; |
662 | |
|
663 | 0 | for(i = 0; i < v->dn; ++i){ |
664 | 0 | memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); |
665 | 0 | } |
666 | 0 | } |
667 | | |
668 | | #ifdef __SSE__ |
669 | | |
670 | 0 | void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ |
671 | 0 | __m128* restrict vw = (__m128*) w; |
672 | 0 | OPJ_INT32 i; |
673 | | /* 4x unrolled loop */ |
674 | 0 | for(i = 0; i < count >> 2; ++i){ |
675 | 0 | *vw = _mm_mul_ps(*vw, c); |
676 | 0 | vw += 2; |
677 | 0 | *vw = _mm_mul_ps(*vw, c); |
678 | 0 | vw += 2; |
679 | 0 | *vw = _mm_mul_ps(*vw, c); |
680 | 0 | vw += 2; |
681 | 0 | *vw = _mm_mul_ps(*vw, c); |
682 | 0 | vw += 2; |
683 | 0 | } |
684 | 0 | count &= 3; |
685 | 0 | for(i = 0; i < count; ++i){ |
686 | 0 | *vw = _mm_mul_ps(*vw, c); |
687 | 0 | vw += 2; |
688 | 0 | } |
689 | 0 | } |
690 | | |
691 | 0 | void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){ |
692 | 0 | __m128* restrict vl = (__m128*) l; |
693 | 0 | __m128* restrict vw = (__m128*) w; |
694 | 0 | OPJ_INT32 i; |
695 | 0 | __m128 tmp1, tmp2, tmp3; |
696 | 0 | tmp1 = vl[0]; |
697 | 0 | for(i = 0; i < m; ++i){ |
698 | 0 | tmp2 = vw[-1]; |
699 | 0 | tmp3 = vw[ 0]; |
700 | 0 | vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); |
701 | 0 | tmp1 = tmp3; |
702 | 0 | vw += 2; |
703 | 0 | } |
704 | 0 | vl = vw - 2; |
705 | 0 | if(m >= k){ |
706 | 0 | return; |
707 | 0 | } |
708 | 0 | c = _mm_add_ps(c, c); |
709 | 0 | c = _mm_mul_ps(c, vl[0]); |
710 | 0 | for(; m < k; ++m){ |
711 | 0 | __m128 tmp = vw[-1]; |
712 | 0 | vw[-1] = _mm_add_ps(tmp, c); |
713 | 0 | vw += 2; |
714 | 0 | } |
715 | 0 | } |
716 | | |
717 | | #else |
718 | | |
719 | | void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c) |
720 | | { |
721 | | OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; |
722 | | OPJ_INT32 i; |
723 | | for(i = 0; i < count; ++i){ |
724 | | OPJ_FLOAT32 tmp1 = fw[i*8 ]; |
725 | | OPJ_FLOAT32 tmp2 = fw[i*8 + 1]; |
726 | | OPJ_FLOAT32 tmp3 = fw[i*8 + 2]; |
727 | | OPJ_FLOAT32 tmp4 = fw[i*8 + 3]; |
728 | | fw[i*8 ] = tmp1 * c; |
729 | | fw[i*8 + 1] = tmp2 * c; |
730 | | fw[i*8 + 2] = tmp3 * c; |
731 | | fw[i*8 + 3] = tmp4 * c; |
732 | | } |
733 | | } |
734 | | |
735 | | void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c) |
736 | | { |
737 | | OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l; |
738 | | OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; |
739 | | OPJ_INT32 i; |
740 | | for(i = 0; i < m; ++i){ |
741 | | OPJ_FLOAT32 tmp1_1 = fl[0]; |
742 | | OPJ_FLOAT32 tmp1_2 = fl[1]; |
743 | | OPJ_FLOAT32 tmp1_3 = fl[2]; |
744 | | OPJ_FLOAT32 tmp1_4 = fl[3]; |
745 | | OPJ_FLOAT32 tmp2_1 = fw[-4]; |
746 | | OPJ_FLOAT32 tmp2_2 = fw[-3]; |
747 | | OPJ_FLOAT32 tmp2_3 = fw[-2]; |
748 | | OPJ_FLOAT32 tmp2_4 = fw[-1]; |
749 | | OPJ_FLOAT32 tmp3_1 = fw[0]; |
750 | | OPJ_FLOAT32 tmp3_2 = fw[1]; |
751 | | OPJ_FLOAT32 tmp3_3 = fw[2]; |
752 | | OPJ_FLOAT32 tmp3_4 = fw[3]; |
753 | | fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); |
754 | | fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); |
755 | | fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); |
756 | | fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); |
757 | | fl = fw; |
758 | | fw += 8; |
759 | | } |
760 | | if(m < k){ |
761 | | OPJ_FLOAT32 c1; |
762 | | OPJ_FLOAT32 c2; |
763 | | OPJ_FLOAT32 c3; |
764 | | OPJ_FLOAT32 c4; |
765 | | c += c; |
766 | | c1 = fl[0] * c; |
767 | | c2 = fl[1] * c; |
768 | | c3 = fl[2] * c; |
769 | | c4 = fl[3] * c; |
770 | | for(; m < k; ++m){ |
771 | | OPJ_FLOAT32 tmp1 = fw[-4]; |
772 | | OPJ_FLOAT32 tmp2 = fw[-3]; |
773 | | OPJ_FLOAT32 tmp3 = fw[-2]; |
774 | | OPJ_FLOAT32 tmp4 = fw[-1]; |
775 | | fw[-4] = tmp1 + c1; |
776 | | fw[-3] = tmp2 + c2; |
777 | | fw[-2] = tmp3 + c3; |
778 | | fw[-1] = tmp4 + c4; |
779 | | fw += 8; |
780 | | } |
781 | | } |
782 | | } |
783 | | |
784 | | #endif |
785 | | |
786 | | /* <summary> */ |
787 | | /* Inverse 9-7 wavelet transform in 1-D. */ |
788 | | /* </summary> */ |
789 | | void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt) |
790 | 0 | { |
791 | 0 | OPJ_INT32 a, b; |
792 | 0 | if(dwt->cas == 0) { |
793 | 0 | if(!((dwt->dn > 0) || (dwt->sn > 1))){ |
794 | 0 | return; |
795 | 0 | } |
796 | 0 | a = 0; |
797 | 0 | b = 1; |
798 | 0 | }else{ |
799 | 0 | if(!((dwt->sn > 0) || (dwt->dn > 1))) { |
800 | 0 | return; |
801 | 0 | } |
802 | 0 | a = 1; |
803 | 0 | b = 0; |
804 | 0 | } |
805 | 0 | #ifdef __SSE__ |
806 | 0 | opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); |
807 | 0 | opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318)); |
808 | 0 | opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); |
809 | 0 | opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); |
810 | 0 | opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); |
811 | 0 | opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); |
812 | | #else |
813 | | opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); |
814 | | opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); |
815 | | opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_delta); |
816 | | opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma); |
817 | | opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_beta); |
818 | | opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha); |
819 | | #endif |
820 | 0 | } |
821 | | |
822 | | |
823 | | /* <summary> */ |
824 | | /* Inverse 9-7 wavelet transform in 2-D. */ |
825 | | /* </summary> */ |
826 | | OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres) |
827 | 0 | { |
828 | 0 | opj_v4dwt_t h; |
829 | 0 | opj_v4dwt_t v; |
830 | |
|
831 | 0 | opj_tcd_resolution_t* res = tilec->resolutions; |
832 | |
|
833 | 0 | OPJ_UINT32 rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */ |
834 | 0 | OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ |
835 | |
|
836 | 0 | OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); |
837 | |
|
838 | 0 | h.wavelet = (opj_v4_t*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(opj_v4_t)); |
839 | 0 | v.wavelet = h.wavelet; |
840 | |
|
841 | 0 | while( --numres) { |
842 | 0 | OPJ_FLOAT32 * restrict aj = (OPJ_FLOAT32*) tilec->data; |
843 | 0 | OPJ_UINT32 bufsize = (OPJ_UINT32)((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0)); |
844 | 0 | OPJ_INT32 j; |
845 | |
|
846 | 0 | h.sn = (OPJ_INT32)rw; |
847 | 0 | v.sn = (OPJ_INT32)rh; |
848 | |
|
849 | 0 | ++res; |
850 | |
|
851 | 0 | rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */ |
852 | 0 | rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ |
853 | |
|
854 | 0 | h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); |
855 | 0 | h.cas = res->x0 % 2; |
856 | |
|
857 | 0 | for(j = (OPJ_INT32)rh; j > 3; j -= 4) { |
858 | 0 | OPJ_INT32 k; |
859 | 0 | opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize); |
860 | 0 | opj_v4dwt_decode(&h); |
861 | |
|
862 | 0 | for(k = (OPJ_INT32)rw; --k >= 0;){ |
863 | 0 | aj[k ] = h.wavelet[k].f[0]; |
864 | 0 | aj[k+(OPJ_INT32)w ] = h.wavelet[k].f[1]; |
865 | 0 | aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2]; |
866 | 0 | aj[k+(OPJ_INT32)w*3] = h.wavelet[k].f[3]; |
867 | 0 | } |
868 | |
|
869 | 0 | aj += w*4; |
870 | 0 | bufsize -= w*4; |
871 | 0 | } |
872 | |
|
873 | 0 | if (rh & 0x03) { |
874 | 0 | OPJ_INT32 k; |
875 | 0 | j = rh & 0x03; |
876 | 0 | opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize); |
877 | 0 | opj_v4dwt_decode(&h); |
878 | 0 | for(k = (OPJ_INT32)rw; --k >= 0;){ |
879 | 0 | switch(j) { |
880 | 0 | case 3: aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2]; |
881 | 0 | case 2: aj[k+(OPJ_INT32)w ] = h.wavelet[k].f[1]; |
882 | 0 | case 1: aj[k ] = h.wavelet[k].f[0]; |
883 | 0 | } |
884 | 0 | } |
885 | 0 | } |
886 | |
|
887 | 0 | v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); |
888 | 0 | v.cas = res->y0 % 2; |
889 | |
|
890 | 0 | aj = (OPJ_FLOAT32*) tilec->data; |
891 | 0 | for(j = (OPJ_INT32)rw; j > 3; j -= 4){ |
892 | 0 | OPJ_UINT32 k; |
893 | |
|
894 | 0 | opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, 4); |
895 | 0 | opj_v4dwt_decode(&v); |
896 | |
|
897 | 0 | for(k = 0; k < rh; ++k){ |
898 | 0 | memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); |
899 | 0 | } |
900 | 0 | aj += 4; |
901 | 0 | } |
902 | |
|
903 | 0 | if (rw & 0x03){ |
904 | 0 | OPJ_UINT32 k; |
905 | |
|
906 | 0 | j = rw & 0x03; |
907 | |
|
908 | 0 | opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, j); |
909 | 0 | opj_v4dwt_decode(&v); |
910 | |
|
911 | 0 | for(k = 0; k < rh; ++k){ |
912 | 0 | memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); |
913 | 0 | } |
914 | 0 | } |
915 | 0 | } |
916 | |
|
917 | 0 | opj_aligned_free(h.wavelet); |
918 | 0 | return OPJ_TRUE; |
919 | 0 | } |