/src/opencv/3rdparty/openjpeg/openjp2/t1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * The copyright in this software is being made available under the 2-clauses |
3 | | * BSD License, included below. This software may be subject to other third |
4 | | * party and contributor rights, including patent rights, and no such rights |
5 | | * are granted under this license. |
6 | | * |
7 | | * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
8 | | * Copyright (c) 2002-2014, Professor Benoit Macq |
9 | | * Copyright (c) 2001-2003, David Janssens |
10 | | * Copyright (c) 2002-2003, Yannick Verschueren |
11 | | * Copyright (c) 2003-2007, Francois-Olivier Devaux |
12 | | * Copyright (c) 2003-2014, Antonin Descampe |
13 | | * Copyright (c) 2005, Herve Drolon, FreeImage Team |
14 | | * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com> |
15 | | * Copyright (c) 2012, Carl Hetherington |
16 | | * Copyright (c) 2017, IntoPIX SA <support@intopix.com> |
17 | | * All rights reserved. |
18 | | * |
19 | | * Redistribution and use in source and binary forms, with or without |
20 | | * modification, are permitted provided that the following conditions |
21 | | * are met: |
22 | | * 1. Redistributions of source code must retain the above copyright |
23 | | * notice, this list of conditions and the following disclaimer. |
24 | | * 2. Redistributions in binary form must reproduce the above copyright |
25 | | * notice, this list of conditions and the following disclaimer in the |
26 | | * documentation and/or other materials provided with the distribution. |
27 | | * |
28 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' |
29 | | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
30 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
31 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
32 | | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
33 | | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
34 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
35 | | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
36 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | * POSSIBILITY OF SUCH DAMAGE. |
39 | | */ |
40 | | |
41 | | #define OPJ_SKIP_POISON |
42 | | #include "opj_includes.h" |
43 | | |
44 | | #ifdef __SSE__ |
45 | | #include <xmmintrin.h> |
46 | | #endif |
47 | | #ifdef __SSE2__ |
48 | | #include <emmintrin.h> |
49 | | #endif |
50 | | #if (defined(__AVX2__) || defined(__AVX512F__)) |
51 | | #include <immintrin.h> |
52 | | #endif |
53 | | |
54 | | #if defined(__GNUC__) |
55 | | #pragma GCC poison malloc calloc realloc free |
56 | | #endif |
57 | | |
58 | | #include "t1_luts.h" |
59 | | |
60 | | /** @defgroup T1 T1 - Implementation of the tier-1 coding */ |
61 | | /*@{*/ |
62 | | |
63 | 74.3k | #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)]) |
64 | | |
65 | 2.28G | #define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] |
66 | | |
67 | | /* Macros to deal with signed integer with just MSB bit set for |
68 | | * negative values (smr = signed magnitude representation) */ |
69 | 0 | #define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU) |
70 | 0 | #define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31) |
71 | 0 | #define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U)) |
72 | | |
73 | | |
74 | | /** @name Local static functions */ |
75 | | /*@{*/ |
76 | | |
77 | | static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f); |
78 | | static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); |
79 | | static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); |
80 | | static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); |
81 | | static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, |
82 | | OPJ_UINT32 s, OPJ_UINT32 stride, |
83 | | OPJ_UINT32 vsc); |
84 | | |
85 | | |
86 | | /** |
87 | | Decode significant pass |
88 | | */ |
89 | | |
90 | | static INLINE void opj_t1_dec_sigpass_step_raw( |
91 | | opj_t1_t *t1, |
92 | | opj_flag_t *flagsp, |
93 | | OPJ_INT32 *datap, |
94 | | OPJ_INT32 oneplushalf, |
95 | | OPJ_UINT32 vsc, |
96 | | OPJ_UINT32 row); |
97 | | static INLINE void opj_t1_dec_sigpass_step_mqc( |
98 | | opj_t1_t *t1, |
99 | | opj_flag_t *flagsp, |
100 | | OPJ_INT32 *datap, |
101 | | OPJ_INT32 oneplushalf, |
102 | | OPJ_UINT32 row, |
103 | | OPJ_UINT32 flags_stride, |
104 | | OPJ_UINT32 vsc); |
105 | | |
106 | | /** |
107 | | Encode significant pass |
108 | | */ |
109 | | static void opj_t1_enc_sigpass(opj_t1_t *t1, |
110 | | OPJ_INT32 bpno, |
111 | | OPJ_INT32 *nmsedec, |
112 | | OPJ_BYTE type, |
113 | | OPJ_UINT32 cblksty); |
114 | | |
115 | | /** |
116 | | Decode significant pass |
117 | | */ |
118 | | static void opj_t1_dec_sigpass_raw( |
119 | | opj_t1_t *t1, |
120 | | OPJ_INT32 bpno, |
121 | | OPJ_INT32 cblksty); |
122 | | |
123 | | /** |
124 | | Encode refinement pass |
125 | | */ |
126 | | static void opj_t1_enc_refpass(opj_t1_t *t1, |
127 | | OPJ_INT32 bpno, |
128 | | OPJ_INT32 *nmsedec, |
129 | | OPJ_BYTE type); |
130 | | |
131 | | /** |
132 | | Decode refinement pass |
133 | | */ |
134 | | static void opj_t1_dec_refpass_raw( |
135 | | opj_t1_t *t1, |
136 | | OPJ_INT32 bpno); |
137 | | |
138 | | |
139 | | /** |
140 | | Decode refinement pass |
141 | | */ |
142 | | |
143 | | static INLINE void opj_t1_dec_refpass_step_raw( |
144 | | opj_t1_t *t1, |
145 | | opj_flag_t *flagsp, |
146 | | OPJ_INT32 *datap, |
147 | | OPJ_INT32 poshalf, |
148 | | OPJ_UINT32 row); |
149 | | static INLINE void opj_t1_dec_refpass_step_mqc( |
150 | | opj_t1_t *t1, |
151 | | opj_flag_t *flagsp, |
152 | | OPJ_INT32 *datap, |
153 | | OPJ_INT32 poshalf, |
154 | | OPJ_UINT32 row); |
155 | | |
156 | | |
157 | | /** |
158 | | Decode clean-up pass |
159 | | */ |
160 | | |
161 | | static void opj_t1_dec_clnpass_step( |
162 | | opj_t1_t *t1, |
163 | | opj_flag_t *flagsp, |
164 | | OPJ_INT32 *datap, |
165 | | OPJ_INT32 oneplushalf, |
166 | | OPJ_UINT32 row, |
167 | | OPJ_UINT32 vsc); |
168 | | |
169 | | /** |
170 | | Encode clean-up pass |
171 | | */ |
172 | | static void opj_t1_enc_clnpass( |
173 | | opj_t1_t *t1, |
174 | | OPJ_INT32 bpno, |
175 | | OPJ_INT32 *nmsedec, |
176 | | OPJ_UINT32 cblksty); |
177 | | |
178 | | static OPJ_FLOAT64 opj_t1_getwmsedec( |
179 | | OPJ_INT32 nmsedec, |
180 | | OPJ_UINT32 compno, |
181 | | OPJ_UINT32 level, |
182 | | OPJ_UINT32 orient, |
183 | | OPJ_INT32 bpno, |
184 | | OPJ_UINT32 qmfbid, |
185 | | OPJ_FLOAT64 stepsize, |
186 | | OPJ_UINT32 numcomps, |
187 | | const OPJ_FLOAT64 * mct_norms, |
188 | | OPJ_UINT32 mct_numcomps); |
189 | | |
190 | | /** Return "cumwmsedec" that should be used to increase tile->distotile */ |
191 | | static double opj_t1_encode_cblk(opj_t1_t *t1, |
192 | | opj_tcd_cblk_enc_t* cblk, |
193 | | OPJ_UINT32 orient, |
194 | | OPJ_UINT32 compno, |
195 | | OPJ_UINT32 level, |
196 | | OPJ_UINT32 qmfbid, |
197 | | OPJ_FLOAT64 stepsize, |
198 | | OPJ_UINT32 cblksty, |
199 | | OPJ_UINT32 numcomps, |
200 | | const OPJ_FLOAT64 * mct_norms, |
201 | | OPJ_UINT32 mct_numcomps); |
202 | | |
203 | | /** |
204 | | Decode 1 code-block |
205 | | @param t1 T1 handle |
206 | | @param cblk Code-block coding parameters |
207 | | @param orient |
208 | | @param roishift Region of interest shifting value |
209 | | @param cblksty Code-block style |
210 | | @param p_manager the event manager |
211 | | @param p_manager_mutex mutex for the event manager |
212 | | @param check_pterm whether PTERM correct termination should be checked |
213 | | */ |
214 | | static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, |
215 | | opj_tcd_cblk_dec_t* cblk, |
216 | | OPJ_UINT32 orient, |
217 | | OPJ_UINT32 roishift, |
218 | | OPJ_UINT32 cblksty, |
219 | | opj_event_mgr_t *p_manager, |
220 | | opj_mutex_t* p_manager_mutex, |
221 | | OPJ_BOOL check_pterm); |
222 | | |
223 | | /** |
224 | | Decode 1 HT code-block |
225 | | @param t1 T1 handle |
226 | | @param cblk Code-block coding parameters |
227 | | @param orient |
228 | | @param roishift Region of interest shifting value |
229 | | @param cblksty Code-block style |
230 | | @param p_manager the event manager |
231 | | @param p_manager_mutex mutex for the event manager |
232 | | @param check_pterm whether PTERM correct termination should be checked |
233 | | */ |
234 | | OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1, |
235 | | opj_tcd_cblk_dec_t* cblk, |
236 | | OPJ_UINT32 orient, |
237 | | OPJ_UINT32 roishift, |
238 | | OPJ_UINT32 cblksty, |
239 | | opj_event_mgr_t *p_manager, |
240 | | opj_mutex_t* p_manager_mutex, |
241 | | OPJ_BOOL check_pterm); |
242 | | |
243 | | |
244 | | static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1, |
245 | | OPJ_UINT32 w, |
246 | | OPJ_UINT32 h); |
247 | | |
248 | | /*@}*/ |
249 | | |
250 | | /*@}*/ |
251 | | |
252 | | /* ----------------------------------------------------------------------- */ |
253 | | |
254 | | static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f) |
255 | 810M | { |
256 | 810M | return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)]; |
257 | 810M | } |
258 | | |
259 | | static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX, |
260 | | OPJ_UINT32 pfX, |
261 | | OPJ_UINT32 nfX, |
262 | | OPJ_UINT32 ci) |
263 | 365M | { |
264 | | /* |
265 | | 0 pfX T1_CHI_THIS T1_LUT_SGN_W |
266 | | 1 tfX T1_SIGMA_1 T1_LUT_SIG_N |
267 | | 2 nfX T1_CHI_THIS T1_LUT_SGN_E |
268 | | 3 tfX T1_SIGMA_3 T1_LUT_SIG_W |
269 | | 4 fX T1_CHI_(THIS - 1) T1_LUT_SGN_N |
270 | | 5 tfX T1_SIGMA_5 T1_LUT_SIG_E |
271 | | 6 fX T1_CHI_(THIS + 1) T1_LUT_SGN_S |
272 | | 7 tfX T1_SIGMA_7 T1_LUT_SIG_S |
273 | | */ |
274 | | |
275 | 365M | OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 | |
276 | 365M | T1_SIGMA_7); |
277 | | |
278 | 365M | lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0); |
279 | 365M | lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2); |
280 | 365M | if (ci == 0U) { |
281 | 91.8M | lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4); |
282 | 273M | } else { |
283 | 273M | lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4); |
284 | 273M | } |
285 | 365M | lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6); |
286 | 365M | return lu; |
287 | 365M | } |
288 | | |
289 | | static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu) |
290 | 365M | { |
291 | 365M | return lut_ctxno_sc[lu]; |
292 | 365M | } |
293 | | |
294 | | static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f) |
295 | 907M | { |
296 | 907M | OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; |
297 | 907M | OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp; |
298 | 907M | return tmp2; |
299 | 907M | } |
300 | | |
301 | | static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu) |
302 | 365M | { |
303 | 365M | return lut_spb[lu]; |
304 | 365M | } |
305 | | |
306 | | static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos) |
307 | 0 | { |
308 | 0 | if (bitpos > 0) { |
309 | 0 | return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)]; |
310 | 0 | } |
311 | | |
312 | 0 | return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; |
313 | 0 | } |
314 | | |
315 | | static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) |
316 | 0 | { |
317 | 0 | if (bitpos > 0) { |
318 | 0 | return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)]; |
319 | 0 | } |
320 | | |
321 | 0 | return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; |
322 | 0 | } |
323 | | |
324 | 368M | #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \ |
325 | 368M | { \ |
326 | 368M | /* east */ \ |
327 | 368M | flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \ |
328 | 368M | \ |
329 | 368M | /* mark target as significant */ \ |
330 | 368M | flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \ |
331 | 368M | \ |
332 | 368M | /* west */ \ |
333 | 368M | flagsp[1] |= T1_SIGMA_3 << (3U * ci); \ |
334 | 368M | \ |
335 | 368M | /* north-west, north, north-east */ \ |
336 | 368M | if (ci == 0U && !(vsc)) { \ |
337 | 41.8M | opj_flag_t* north = flagsp - (stride); \ |
338 | 41.8M | *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \ |
339 | 41.8M | north[-1] |= T1_SIGMA_17; \ |
340 | 41.8M | north[1] |= T1_SIGMA_15; \ |
341 | 41.8M | } \ |
342 | 368M | \ |
343 | 368M | /* south-west, south, south-east */ \ |
344 | 368M | if (ci == 3U) { \ |
345 | 90.4M | opj_flag_t* south = flagsp + (stride); \ |
346 | 90.4M | *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \ |
347 | 90.4M | south[-1] |= T1_SIGMA_2; \ |
348 | 90.4M | south[1] |= T1_SIGMA_0; \ |
349 | 90.4M | } \ |
350 | 368M | } |
351 | | |
352 | | |
353 | | static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, |
354 | | OPJ_UINT32 s, OPJ_UINT32 stride, |
355 | | OPJ_UINT32 vsc) |
356 | 3.40M | { |
357 | 3.40M | opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc); |
358 | 3.40M | } |
359 | | |
360 | | /** |
361 | | Encode significant pass |
362 | | */ |
363 | 0 | #define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \ |
364 | 0 | { \ |
365 | 0 | OPJ_UINT32 v; \ |
366 | 0 | const OPJ_UINT32 ci = (ciIn); \ |
367 | 0 | const OPJ_UINT32 vsc = (vscIn); \ |
368 | 0 | const OPJ_INT32* l_datap = (datapIn); \ |
369 | 0 | opj_flag_t* flagsp = (flagspIn); \ |
370 | 0 | OPJ_UINT32 const flags = *flagsp; \ |
371 | 0 | if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ |
372 | 0 | (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ |
373 | 0 | OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ |
374 | 0 | v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ |
375 | 0 | /* #ifdef DEBUG_ENC_SIG */ \ |
376 | 0 | /* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \ |
377 | 0 | /* #endif */ \ |
378 | 0 | opj_t1_setcurctx(curctx, ctxt1); \ |
379 | 0 | if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ |
380 | 0 | opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ |
381 | 0 | } else { \ |
382 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ |
383 | 0 | } \ |
384 | 0 | if (v) { \ |
385 | 0 | OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ |
386 | 0 | *flagsp, \ |
387 | 0 | flagsp[-1], flagsp[1], \ |
388 | 0 | ci); \ |
389 | 0 | OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \ |
390 | 0 | v = opj_smr_sign(*l_datap); \ |
391 | 0 | *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ |
392 | 0 | (OPJ_UINT32)bpno); \ |
393 | 0 | /* #ifdef DEBUG_ENC_SIG */ \ |
394 | 0 | /* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \ |
395 | 0 | /* #endif */ \ |
396 | 0 | opj_t1_setcurctx(curctx, ctxt2); \ |
397 | 0 | if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ |
398 | 0 | opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ |
399 | 0 | } else { \ |
400 | 0 | OPJ_UINT32 spb = opj_t1_getspb(lu); \ |
401 | 0 | /* #ifdef DEBUG_ENC_SIG */ \ |
402 | 0 | /* fprintf(stderr, " spb=%d\n", spb); */ \ |
403 | 0 | /* #endif */ \ |
404 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ |
405 | 0 | } \ |
406 | 0 | opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \ |
407 | 0 | } \ |
408 | 0 | *flagsp |= T1_PI_THIS << (ci * 3U); \ |
409 | 0 | } \ |
410 | 0 | } |
411 | | |
412 | | static INLINE void opj_t1_dec_sigpass_step_raw( |
413 | | opj_t1_t *t1, |
414 | | opj_flag_t *flagsp, |
415 | | OPJ_INT32 *datap, |
416 | | OPJ_INT32 oneplushalf, |
417 | | OPJ_UINT32 vsc, |
418 | | OPJ_UINT32 ci) |
419 | 48.6M | { |
420 | 48.6M | OPJ_UINT32 v; |
421 | 48.6M | opj_mqc_t *mqc = &(t1->mqc); /* RAW component */ |
422 | | |
423 | 48.6M | OPJ_UINT32 const flags = *flagsp; |
424 | | |
425 | 48.6M | if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && |
426 | 48.6M | (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { |
427 | 4.31M | if (opj_mqc_raw_decode(mqc)) { |
428 | 3.40M | v = opj_mqc_raw_decode(mqc); |
429 | 3.40M | *datap = v ? -oneplushalf : oneplushalf; |
430 | 3.40M | opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); |
431 | 3.40M | } |
432 | 4.31M | *flagsp |= T1_PI_THIS << (ci * 3U); |
433 | 4.31M | } |
434 | 48.6M | } |
435 | | |
436 | | #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \ |
437 | | data_stride, ci, mqc, curctx, \ |
438 | 1.36G | v, a, c, ct, oneplushalf, vsc) \ |
439 | 1.36G | { \ |
440 | 1.36G | if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ |
441 | 1.36G | (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ |
442 | 372M | OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ |
443 | 372M | opj_t1_setcurctx(curctx, ctxt1); \ |
444 | 372M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
445 | 372M | if (v) { \ |
446 | 171M | OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ |
447 | 171M | flags, \ |
448 | 171M | flagsp[-1], flagsp[1], \ |
449 | 171M | ci); \ |
450 | 171M | OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \ |
451 | 171M | OPJ_UINT32 spb = opj_t1_getspb(lu); \ |
452 | 171M | opj_t1_setcurctx(curctx, ctxt2); \ |
453 | 171M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
454 | 171M | v = v ^ spb; \ |
455 | 171M | data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \ |
456 | 171M | opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \ |
457 | 171M | } \ |
458 | 372M | flags |= T1_PI_THIS << (ci * 3U); \ |
459 | 372M | } \ |
460 | 1.36G | } |
461 | | |
462 | | static INLINE void opj_t1_dec_sigpass_step_mqc( |
463 | | opj_t1_t *t1, |
464 | | opj_flag_t *flagsp, |
465 | | OPJ_INT32 *datap, |
466 | | OPJ_INT32 oneplushalf, |
467 | | OPJ_UINT32 ci, |
468 | | OPJ_UINT32 flags_stride, |
469 | | OPJ_UINT32 vsc) |
470 | 19.2M | { |
471 | 19.2M | OPJ_UINT32 v; |
472 | | |
473 | 19.2M | opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ |
474 | 19.2M | opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap, |
475 | 19.2M | 0, ci, mqc, mqc->curctx, |
476 | 19.2M | v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc); |
477 | 19.2M | } |
478 | | |
479 | | static void opj_t1_enc_sigpass(opj_t1_t *t1, |
480 | | OPJ_INT32 bpno, |
481 | | OPJ_INT32 *nmsedec, |
482 | | OPJ_BYTE type, |
483 | | OPJ_UINT32 cblksty |
484 | | ) |
485 | 0 | { |
486 | 0 | OPJ_UINT32 i, k; |
487 | 0 | OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); |
488 | 0 | opj_flag_t* f = &T1_FLAGS(0, 0); |
489 | 0 | OPJ_UINT32 const extra = 2; |
490 | 0 | opj_mqc_t* mqc = &(t1->mqc); |
491 | 0 | DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); |
492 | 0 | const OPJ_INT32* datap = t1->data; |
493 | |
|
494 | 0 | *nmsedec = 0; |
495 | | #ifdef DEBUG_ENC_SIG |
496 | | fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno); |
497 | | #endif |
498 | 0 | for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { |
499 | 0 | const OPJ_UINT32 w = t1->w; |
500 | | #ifdef DEBUG_ENC_SIG |
501 | | fprintf(stderr, " k=%d\n", k); |
502 | | #endif |
503 | 0 | for (i = 0; i < w; ++i, ++f, datap += 4) { |
504 | | #ifdef DEBUG_ENC_SIG |
505 | | fprintf(stderr, " i=%d\n", i); |
506 | | #endif |
507 | 0 | if (*f == 0U) { |
508 | | /* Nothing to do for any of the 4 data points */ |
509 | 0 | continue; |
510 | 0 | } |
511 | 0 | opj_t1_enc_sigpass_step_macro( |
512 | 0 | mqc, curctx, a, c, ct, |
513 | 0 | f, |
514 | 0 | &datap[0], |
515 | 0 | bpno, |
516 | 0 | one, |
517 | 0 | nmsedec, |
518 | 0 | type, |
519 | 0 | 0, cblksty & J2K_CCP_CBLKSTY_VSC); |
520 | 0 | opj_t1_enc_sigpass_step_macro( |
521 | 0 | mqc, curctx, a, c, ct, |
522 | 0 | f, |
523 | 0 | &datap[1], |
524 | 0 | bpno, |
525 | 0 | one, |
526 | 0 | nmsedec, |
527 | 0 | type, |
528 | 0 | 1, 0); |
529 | 0 | opj_t1_enc_sigpass_step_macro( |
530 | 0 | mqc, curctx, a, c, ct, |
531 | 0 | f, |
532 | 0 | &datap[2], |
533 | 0 | bpno, |
534 | 0 | one, |
535 | 0 | nmsedec, |
536 | 0 | type, |
537 | 0 | 2, 0); |
538 | 0 | opj_t1_enc_sigpass_step_macro( |
539 | 0 | mqc, curctx, a, c, ct, |
540 | 0 | f, |
541 | 0 | &datap[3], |
542 | 0 | bpno, |
543 | 0 | one, |
544 | 0 | nmsedec, |
545 | 0 | type, |
546 | 0 | 3, 0); |
547 | 0 | } |
548 | 0 | } |
549 | |
|
550 | 0 | if (k < t1->h) { |
551 | 0 | OPJ_UINT32 j; |
552 | | #ifdef DEBUG_ENC_SIG |
553 | | fprintf(stderr, " k=%d\n", k); |
554 | | #endif |
555 | 0 | for (i = 0; i < t1->w; ++i, ++f) { |
556 | | #ifdef DEBUG_ENC_SIG |
557 | | fprintf(stderr, " i=%d\n", i); |
558 | | #endif |
559 | 0 | if (*f == 0U) { |
560 | | /* Nothing to do for any of the 4 data points */ |
561 | 0 | datap += (t1->h - k); |
562 | 0 | continue; |
563 | 0 | } |
564 | 0 | for (j = k; j < t1->h; ++j, ++datap) { |
565 | 0 | opj_t1_enc_sigpass_step_macro( |
566 | 0 | mqc, curctx, a, c, ct, |
567 | 0 | f, |
568 | 0 | &datap[0], |
569 | 0 | bpno, |
570 | 0 | one, |
571 | 0 | nmsedec, |
572 | 0 | type, |
573 | 0 | j - k, |
574 | 0 | (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); |
575 | 0 | } |
576 | 0 | } |
577 | 0 | } |
578 | |
|
579 | 0 | UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); |
580 | 0 | } |
581 | | |
582 | | static void opj_t1_dec_sigpass_raw( |
583 | | opj_t1_t *t1, |
584 | | OPJ_INT32 bpno, |
585 | | OPJ_INT32 cblksty) |
586 | 37.9k | { |
587 | 37.9k | OPJ_INT32 one, half, oneplushalf; |
588 | 37.9k | OPJ_UINT32 i, j, k; |
589 | 37.9k | OPJ_INT32 *data = t1->data; |
590 | 37.9k | opj_flag_t *flagsp = &T1_FLAGS(0, 0); |
591 | 37.9k | const OPJ_UINT32 l_w = t1->w; |
592 | 37.9k | one = 1 << bpno; |
593 | 37.9k | half = one >> 1; |
594 | 37.9k | oneplushalf = one | half; |
595 | | |
596 | 325k | for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) { |
597 | 14.0M | for (i = 0; i < l_w; ++i, ++flagsp, ++data) { |
598 | 13.7M | opj_flag_t flags = *flagsp; |
599 | 13.7M | if (flags != 0) { |
600 | 11.4M | opj_t1_dec_sigpass_step_raw( |
601 | 11.4M | t1, |
602 | 11.4M | flagsp, |
603 | 11.4M | data, |
604 | 11.4M | oneplushalf, |
605 | 11.4M | cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ |
606 | 11.4M | 0U); |
607 | 11.4M | opj_t1_dec_sigpass_step_raw( |
608 | 11.4M | t1, |
609 | 11.4M | flagsp, |
610 | 11.4M | data + l_w, |
611 | 11.4M | oneplushalf, |
612 | 11.4M | OPJ_FALSE, /* vsc */ |
613 | 11.4M | 1U); |
614 | 11.4M | opj_t1_dec_sigpass_step_raw( |
615 | 11.4M | t1, |
616 | 11.4M | flagsp, |
617 | 11.4M | data + 2 * l_w, |
618 | 11.4M | oneplushalf, |
619 | 11.4M | OPJ_FALSE, /* vsc */ |
620 | 11.4M | 2U); |
621 | 11.4M | opj_t1_dec_sigpass_step_raw( |
622 | 11.4M | t1, |
623 | 11.4M | flagsp, |
624 | 11.4M | data + 3 * l_w, |
625 | 11.4M | oneplushalf, |
626 | 11.4M | OPJ_FALSE, /* vsc */ |
627 | 11.4M | 3U); |
628 | 11.4M | } |
629 | 13.7M | } |
630 | 287k | } |
631 | 37.9k | if (k < t1->h) { |
632 | 1.07M | for (i = 0; i < l_w; ++i, ++flagsp, ++data) { |
633 | 4.03M | for (j = 0; j < t1->h - k; ++j) { |
634 | 2.95M | opj_t1_dec_sigpass_step_raw( |
635 | 2.95M | t1, |
636 | 2.95M | flagsp, |
637 | 2.95M | data + j * l_w, |
638 | 2.95M | oneplushalf, |
639 | 2.95M | cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ |
640 | 2.95M | j); |
641 | 2.95M | } |
642 | 1.07M | } |
643 | 2.37k | } |
644 | 37.9k | } |
645 | | |
646 | 815k | #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \ |
647 | 815k | { \ |
648 | 815k | OPJ_INT32 one, half, oneplushalf; \ |
649 | 815k | OPJ_UINT32 i, j, k; \ |
650 | 815k | register OPJ_INT32 *data = t1->data; \ |
651 | 815k | register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \ |
652 | 815k | const OPJ_UINT32 l_w = w; \ |
653 | 815k | opj_mqc_t* mqc = &(t1->mqc); \ |
654 | 815k | DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ |
655 | 815k | register OPJ_UINT32 v; \ |
656 | 815k | one = 1 << bpno; \ |
657 | 815k | half = one >> 1; \ |
658 | 815k | oneplushalf = one | half; \ |
659 | 10.0M | for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ |
660 | 462M | for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ |
661 | 452M | opj_flag_t flags = *flagsp; \ |
662 | 452M | if( flags != 0 ) { \ |
663 | 336M | opj_t1_dec_sigpass_step_mqc_macro( \ |
664 | 336M | flags, flagsp, flags_stride, data, \ |
665 | 336M | l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \ |
666 | 336M | opj_t1_dec_sigpass_step_mqc_macro( \ |
667 | 336M | flags, flagsp, flags_stride, data, \ |
668 | 336M | l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
669 | 336M | opj_t1_dec_sigpass_step_mqc_macro( \ |
670 | 336M | flags, flagsp, flags_stride, data, \ |
671 | 336M | l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
672 | 336M | opj_t1_dec_sigpass_step_mqc_macro( \ |
673 | 336M | flags, flagsp, flags_stride, data, \ |
674 | 336M | l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
675 | 336M | *flagsp = flags; \ |
676 | 336M | } \ |
677 | 452M | } \ |
678 | 9.28M | } \ |
679 | 815k | UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ |
680 | 815k | if( k < h ) { \ |
681 | 7.56M | for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ |
682 | 26.6M | for (j = 0; j < h - k; ++j) { \ |
683 | 19.2M | opj_t1_dec_sigpass_step_mqc(t1, flagsp, \ |
684 | 19.2M | data + j * l_w, oneplushalf, j, flags_stride, vsc); \ |
685 | 19.2M | } \ |
686 | 7.48M | } \ |
687 | 76.0k | } \ |
688 | 815k | } |
689 | | |
690 | | static void opj_t1_dec_sigpass_mqc_64x64_novsc( |
691 | | opj_t1_t *t1, |
692 | | OPJ_INT32 bpno) |
693 | 89.8k | { |
694 | 89.8k | opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66); |
695 | 89.8k | } |
696 | | |
697 | | static void opj_t1_dec_sigpass_mqc_64x64_vsc( |
698 | | opj_t1_t *t1, |
699 | | OPJ_INT32 bpno) |
700 | 159k | { |
701 | 159k | opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66); |
702 | 159k | } |
703 | | |
704 | | static void opj_t1_dec_sigpass_mqc_generic_novsc( |
705 | | opj_t1_t *t1, |
706 | | OPJ_INT32 bpno) |
707 | 201k | { |
708 | 201k | opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h, |
709 | 201k | t1->w + 2U); |
710 | 201k | } |
711 | | |
712 | | static void opj_t1_dec_sigpass_mqc_generic_vsc( |
713 | | opj_t1_t *t1, |
714 | | OPJ_INT32 bpno) |
715 | 364k | { |
716 | 364k | opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h, |
717 | 364k | t1->w + 2U); |
718 | 364k | } |
719 | | |
720 | | static void opj_t1_dec_sigpass_mqc( |
721 | | opj_t1_t *t1, |
722 | | OPJ_INT32 bpno, |
723 | | OPJ_INT32 cblksty) |
724 | 815k | { |
725 | 815k | if (t1->w == 64 && t1->h == 64) { |
726 | 249k | if (cblksty & J2K_CCP_CBLKSTY_VSC) { |
727 | 159k | opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno); |
728 | 159k | } else { |
729 | 89.8k | opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno); |
730 | 89.8k | } |
731 | 565k | } else { |
732 | 565k | if (cblksty & J2K_CCP_CBLKSTY_VSC) { |
733 | 364k | opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno); |
734 | 364k | } else { |
735 | 201k | opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno); |
736 | 201k | } |
737 | 565k | } |
738 | 815k | } |
739 | | |
740 | | /** |
741 | | Encode refinement pass step |
742 | | */ |
743 | 0 | #define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \ |
744 | 0 | {\ |
745 | 0 | OPJ_UINT32 v; \ |
746 | 0 | if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \ |
747 | 0 | const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \ |
748 | 0 | OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \ |
749 | 0 | OPJ_UINT32 abs_data = opj_smr_abs(*datap); \ |
750 | 0 | *nmsedec += opj_t1_getnmsedec_ref(abs_data, \ |
751 | 0 | (OPJ_UINT32)bpno); \ |
752 | 0 | v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \ |
753 | 0 | /* #ifdef DEBUG_ENC_REF */ \ |
754 | 0 | /* fprintf(stderr, " ctxt=%d\n", ctxt); */ \ |
755 | 0 | /* #endif */ \ |
756 | 0 | opj_t1_setcurctx(curctx, ctxt); \ |
757 | 0 | if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ |
758 | 0 | opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ |
759 | 0 | } else { \ |
760 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ |
761 | 0 | } \ |
762 | 0 | flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \ |
763 | 0 | } \ |
764 | 0 | } |
765 | | |
766 | | |
767 | | static INLINE void opj_t1_dec_refpass_step_raw( |
768 | | opj_t1_t *t1, |
769 | | opj_flag_t *flagsp, |
770 | | OPJ_INT32 *datap, |
771 | | OPJ_INT32 poshalf, |
772 | | OPJ_UINT32 ci) |
773 | 43.5M | { |
774 | 43.5M | OPJ_UINT32 v; |
775 | | |
776 | 43.5M | opj_mqc_t *mqc = &(t1->mqc); /* RAW component */ |
777 | | |
778 | 43.5M | if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == |
779 | 43.5M | (T1_SIGMA_THIS << (ci * 3U))) { |
780 | 38.9M | v = opj_mqc_raw_decode(mqc); |
781 | 38.9M | *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; |
782 | 38.9M | *flagsp |= T1_MU_THIS << (ci * 3U); |
783 | 38.9M | } |
784 | 43.5M | } |
785 | | |
786 | | #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \ |
787 | 1.25G | mqc, curctx, v, a, c, ct, poshalf) \ |
788 | 1.25G | { \ |
789 | 1.25G | if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \ |
790 | 1.25G | (T1_SIGMA_THIS << (ci * 3U))) { \ |
791 | 907M | OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \ |
792 | 907M | opj_t1_setcurctx(curctx, ctxt); \ |
793 | 907M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
794 | 907M | data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \ |
795 | 907M | flags |= T1_MU_THIS << (ci * 3U); \ |
796 | 907M | } \ |
797 | 1.25G | } |
798 | | |
799 | | static INLINE void opj_t1_dec_refpass_step_mqc( |
800 | | opj_t1_t *t1, |
801 | | opj_flag_t *flagsp, |
802 | | OPJ_INT32 *datap, |
803 | | OPJ_INT32 poshalf, |
804 | | OPJ_UINT32 ci) |
805 | 17.0M | { |
806 | 17.0M | OPJ_UINT32 v; |
807 | | |
808 | 17.0M | opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ |
809 | 17.0M | opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci, |
810 | 17.0M | mqc, mqc->curctx, v, mqc->a, mqc->c, |
811 | 17.0M | mqc->ct, poshalf); |
812 | 17.0M | } |
813 | | |
814 | | static void opj_t1_enc_refpass( |
815 | | opj_t1_t *t1, |
816 | | OPJ_INT32 bpno, |
817 | | OPJ_INT32 *nmsedec, |
818 | | OPJ_BYTE type) |
819 | 0 | { |
820 | 0 | OPJ_UINT32 i, k; |
821 | 0 | const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); |
822 | 0 | opj_flag_t* f = &T1_FLAGS(0, 0); |
823 | 0 | const OPJ_UINT32 extra = 2U; |
824 | 0 | opj_mqc_t* mqc = &(t1->mqc); |
825 | 0 | DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); |
826 | 0 | const OPJ_INT32* datap = t1->data; |
827 | |
|
828 | 0 | *nmsedec = 0; |
829 | | #ifdef DEBUG_ENC_REF |
830 | | fprintf(stderr, "enc_refpass: bpno=%d\n", bpno); |
831 | | #endif |
832 | 0 | for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { |
833 | | #ifdef DEBUG_ENC_REF |
834 | | fprintf(stderr, " k=%d\n", k); |
835 | | #endif |
836 | 0 | for (i = 0; i < t1->w; ++i, f++, datap += 4) { |
837 | 0 | const OPJ_UINT32 flags = *f; |
838 | 0 | OPJ_UINT32 flagsUpdated = flags; |
839 | | #ifdef DEBUG_ENC_REF |
840 | | fprintf(stderr, " i=%d\n", i); |
841 | | #endif |
842 | 0 | if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { |
843 | | /* none significant */ |
844 | 0 | continue; |
845 | 0 | } |
846 | 0 | if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == |
847 | 0 | (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { |
848 | | /* all processed by sigpass */ |
849 | 0 | continue; |
850 | 0 | } |
851 | | |
852 | 0 | opj_t1_enc_refpass_step_macro( |
853 | 0 | mqc, curctx, a, c, ct, |
854 | 0 | flags, flagsUpdated, |
855 | 0 | &datap[0], |
856 | 0 | bpno, |
857 | 0 | one, |
858 | 0 | nmsedec, |
859 | 0 | type, |
860 | 0 | 0); |
861 | 0 | opj_t1_enc_refpass_step_macro( |
862 | 0 | mqc, curctx, a, c, ct, |
863 | 0 | flags, flagsUpdated, |
864 | 0 | &datap[1], |
865 | 0 | bpno, |
866 | 0 | one, |
867 | 0 | nmsedec, |
868 | 0 | type, |
869 | 0 | 1); |
870 | 0 | opj_t1_enc_refpass_step_macro( |
871 | 0 | mqc, curctx, a, c, ct, |
872 | 0 | flags, flagsUpdated, |
873 | 0 | &datap[2], |
874 | 0 | bpno, |
875 | 0 | one, |
876 | 0 | nmsedec, |
877 | 0 | type, |
878 | 0 | 2); |
879 | 0 | opj_t1_enc_refpass_step_macro( |
880 | 0 | mqc, curctx, a, c, ct, |
881 | 0 | flags, flagsUpdated, |
882 | 0 | &datap[3], |
883 | 0 | bpno, |
884 | 0 | one, |
885 | 0 | nmsedec, |
886 | 0 | type, |
887 | 0 | 3); |
888 | 0 | *f = flagsUpdated; |
889 | 0 | } |
890 | 0 | } |
891 | |
|
892 | 0 | if (k < t1->h) { |
893 | 0 | OPJ_UINT32 j; |
894 | 0 | const OPJ_UINT32 remaining_lines = t1->h - k; |
895 | | #ifdef DEBUG_ENC_REF |
896 | | fprintf(stderr, " k=%d\n", k); |
897 | | #endif |
898 | 0 | for (i = 0; i < t1->w; ++i, ++f) { |
899 | | #ifdef DEBUG_ENC_REF |
900 | | fprintf(stderr, " i=%d\n", i); |
901 | | #endif |
902 | 0 | if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { |
903 | | /* none significant */ |
904 | 0 | datap += remaining_lines; |
905 | 0 | continue; |
906 | 0 | } |
907 | 0 | for (j = 0; j < remaining_lines; ++j, datap ++) { |
908 | 0 | opj_t1_enc_refpass_step_macro( |
909 | 0 | mqc, curctx, a, c, ct, |
910 | 0 | *f, *f, |
911 | 0 | &datap[0], |
912 | 0 | bpno, |
913 | 0 | one, |
914 | 0 | nmsedec, |
915 | 0 | type, |
916 | 0 | j); |
917 | 0 | } |
918 | 0 | } |
919 | 0 | } |
920 | |
|
921 | 0 | UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); |
922 | 0 | } |
923 | | |
924 | | |
925 | | static void opj_t1_dec_refpass_raw( |
926 | | opj_t1_t *t1, |
927 | | OPJ_INT32 bpno) |
928 | 36.3k | { |
929 | 36.3k | OPJ_INT32 one, poshalf; |
930 | 36.3k | OPJ_UINT32 i, j, k; |
931 | 36.3k | OPJ_INT32 *data = t1->data; |
932 | 36.3k | opj_flag_t *flagsp = &T1_FLAGS(0, 0); |
933 | 36.3k | const OPJ_UINT32 l_w = t1->w; |
934 | 36.3k | one = 1 << bpno; |
935 | 36.3k | poshalf = one >> 1; |
936 | 321k | for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) { |
937 | 12.6M | for (i = 0; i < l_w; ++i, ++flagsp, ++data) { |
938 | 12.3M | opj_flag_t flags = *flagsp; |
939 | 12.3M | if (flags != 0) { |
940 | 10.2M | opj_t1_dec_refpass_step_raw( |
941 | 10.2M | t1, |
942 | 10.2M | flagsp, |
943 | 10.2M | data, |
944 | 10.2M | poshalf, |
945 | 10.2M | 0U); |
946 | 10.2M | opj_t1_dec_refpass_step_raw( |
947 | 10.2M | t1, |
948 | 10.2M | flagsp, |
949 | 10.2M | data + l_w, |
950 | 10.2M | poshalf, |
951 | 10.2M | 1U); |
952 | 10.2M | opj_t1_dec_refpass_step_raw( |
953 | 10.2M | t1, |
954 | 10.2M | flagsp, |
955 | 10.2M | data + 2 * l_w, |
956 | 10.2M | poshalf, |
957 | 10.2M | 2U); |
958 | 10.2M | opj_t1_dec_refpass_step_raw( |
959 | 10.2M | t1, |
960 | 10.2M | flagsp, |
961 | 10.2M | data + 3 * l_w, |
962 | 10.2M | poshalf, |
963 | 10.2M | 3U); |
964 | 10.2M | } |
965 | 12.3M | } |
966 | 284k | } |
967 | 36.3k | if (k < t1->h) { |
968 | 986k | for (i = 0; i < l_w; ++i, ++flagsp, ++data) { |
969 | 3.68M | for (j = 0; j < t1->h - k; ++j) { |
970 | 2.70M | opj_t1_dec_refpass_step_raw( |
971 | 2.70M | t1, |
972 | 2.70M | flagsp, |
973 | 2.70M | data + j * l_w, |
974 | 2.70M | poshalf, |
975 | 2.70M | j); |
976 | 2.70M | } |
977 | 984k | } |
978 | 2.28k | } |
979 | 36.3k | } |
980 | | |
981 | 761k | #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \ |
982 | 761k | { \ |
983 | 761k | OPJ_INT32 one, poshalf; \ |
984 | 761k | OPJ_UINT32 i, j, k; \ |
985 | 761k | register OPJ_INT32 *data = t1->data; \ |
986 | 761k | register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ |
987 | 761k | const OPJ_UINT32 l_w = w; \ |
988 | 761k | opj_mqc_t* mqc = &(t1->mqc); \ |
989 | 761k | DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ |
990 | 761k | register OPJ_UINT32 v; \ |
991 | 761k | one = 1 << bpno; \ |
992 | 761k | poshalf = one >> 1; \ |
993 | 9.53M | for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ |
994 | 418M | for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ |
995 | 409M | opj_flag_t flags = *flagsp; \ |
996 | 409M | if( flags != 0 ) { \ |
997 | 310M | opj_t1_dec_refpass_step_mqc_macro( \ |
998 | 310M | flags, data, l_w, 0, \ |
999 | 310M | mqc, curctx, v, a, c, ct, poshalf); \ |
1000 | 310M | opj_t1_dec_refpass_step_mqc_macro( \ |
1001 | 310M | flags, data, l_w, 1, \ |
1002 | 310M | mqc, curctx, v, a, c, ct, poshalf); \ |
1003 | 310M | opj_t1_dec_refpass_step_mqc_macro( \ |
1004 | 310M | flags, data, l_w, 2, \ |
1005 | 310M | mqc, curctx, v, a, c, ct, poshalf); \ |
1006 | 310M | opj_t1_dec_refpass_step_mqc_macro( \ |
1007 | 310M | flags, data, l_w, 3, \ |
1008 | 310M | mqc, curctx, v, a, c, ct, poshalf); \ |
1009 | 310M | *flagsp = flags; \ |
1010 | 310M | } \ |
1011 | 409M | } \ |
1012 | 8.77M | } \ |
1013 | 761k | UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ |
1014 | 761k | if( k < h ) { \ |
1015 | 6.73M | for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ |
1016 | 23.7M | for (j = 0; j < h - k; ++j) { \ |
1017 | 17.0M | opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \ |
1018 | 17.0M | } \ |
1019 | 6.65M | } \ |
1020 | 72.8k | } \ |
1021 | 761k | } |
1022 | | |
1023 | | static void opj_t1_dec_refpass_mqc_64x64( |
1024 | | opj_t1_t *t1, |
1025 | | OPJ_INT32 bpno) |
1026 | 223k | { |
1027 | 223k | opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66); |
1028 | 223k | } |
1029 | | |
1030 | | static void opj_t1_dec_refpass_mqc_generic( |
1031 | | opj_t1_t *t1, |
1032 | | OPJ_INT32 bpno) |
1033 | 537k | { |
1034 | 537k | opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U); |
1035 | 537k | } |
1036 | | |
1037 | | static void opj_t1_dec_refpass_mqc( |
1038 | | opj_t1_t *t1, |
1039 | | OPJ_INT32 bpno) |
1040 | 761k | { |
1041 | 761k | if (t1->w == 64 && t1->h == 64) { |
1042 | 223k | opj_t1_dec_refpass_mqc_64x64(t1, bpno); |
1043 | 537k | } else { |
1044 | 537k | opj_t1_dec_refpass_mqc_generic(t1, bpno); |
1045 | 537k | } |
1046 | 761k | } |
1047 | | |
1048 | | /** |
1049 | | Encode clean-up pass step |
1050 | | */ |
1051 | 0 | #define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \ |
1052 | 0 | { \ |
1053 | 0 | OPJ_UINT32 v; \ |
1054 | 0 | OPJ_UINT32 ci; \ |
1055 | 0 | opj_flag_t* const flagsp = (flagspIn); \ |
1056 | 0 | const OPJ_INT32* l_datap = (datapIn); \ |
1057 | 0 | const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \ |
1058 | 0 | T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ |
1059 | 0 | \ |
1060 | 0 | if ((*flagsp & check) == check) { \ |
1061 | 0 | if (runlen == 0) { \ |
1062 | 0 | *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ |
1063 | 0 | } else if (runlen == 1) { \ |
1064 | 0 | *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \ |
1065 | 0 | } else if (runlen == 2) { \ |
1066 | 0 | *flagsp &= ~(T1_PI_2 | T1_PI_3); \ |
1067 | 0 | } else if (runlen == 3) { \ |
1068 | 0 | *flagsp &= ~(T1_PI_3); \ |
1069 | 0 | } \ |
1070 | 0 | } \ |
1071 | 0 | else \ |
1072 | 0 | for (ci = runlen; ci < lim; ++ci) { \ |
1073 | 0 | OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \ |
1074 | 0 | if ((agg != 0) && (ci == runlen)) { \ |
1075 | 0 | goto_PARTIAL = OPJ_TRUE; \ |
1076 | 0 | } \ |
1077 | 0 | else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \ |
1078 | 0 | OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \ |
1079 | 0 | /* #ifdef DEBUG_ENC_CLN */ \ |
1080 | 0 | /* printf(" ctxt1=%d\n", ctxt1); */ \ |
1081 | 0 | /* #endif */ \ |
1082 | 0 | opj_t1_setcurctx(curctx, ctxt1); \ |
1083 | 0 | v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ |
1084 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ |
1085 | 0 | if (v) { \ |
1086 | 0 | goto_PARTIAL = OPJ_TRUE; \ |
1087 | 0 | } \ |
1088 | 0 | } \ |
1089 | 0 | if( goto_PARTIAL ) { \ |
1090 | 0 | OPJ_UINT32 vsc; \ |
1091 | 0 | OPJ_UINT32 ctxt2, spb; \ |
1092 | 0 | OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ |
1093 | 0 | *flagsp, \ |
1094 | 0 | flagsp[-1], flagsp[1], \ |
1095 | 0 | ci); \ |
1096 | 0 | *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ |
1097 | 0 | (OPJ_UINT32)bpno); \ |
1098 | 0 | ctxt2 = opj_t1_getctxno_sc(lu); \ |
1099 | 0 | /* #ifdef DEBUG_ENC_CLN */ \ |
1100 | 0 | /* printf(" ctxt2=%d\n", ctxt2); */ \ |
1101 | 0 | /* #endif */ \ |
1102 | 0 | opj_t1_setcurctx(curctx, ctxt2); \ |
1103 | 0 | \ |
1104 | 0 | v = opj_smr_sign(*l_datap); \ |
1105 | 0 | spb = opj_t1_getspb(lu); \ |
1106 | 0 | /* #ifdef DEBUG_ENC_CLN */ \ |
1107 | 0 | /* printf(" spb=%d\n", spb); */\ |
1108 | 0 | /* #endif */ \ |
1109 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ |
1110 | 0 | vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \ |
1111 | 0 | opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \ |
1112 | 0 | } \ |
1113 | 0 | *flagsp &= ~(T1_PI_THIS << (3U * ci)); \ |
1114 | 0 | l_datap ++; \ |
1115 | 0 | } \ |
1116 | 0 | } |
1117 | | |
1118 | | #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ |
1119 | | flags, flagsp, flags_stride, data, \ |
1120 | | data_stride, ci, mqc, curctx, \ |
1121 | 1.64G | v, a, c, ct, oneplushalf, vsc) \ |
1122 | 1.64G | { \ |
1123 | 1.64G | if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\ |
1124 | 447M | do { \ |
1125 | 447M | if( !partial ) { \ |
1126 | 438M | OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ |
1127 | 438M | opj_t1_setcurctx(curctx, ctxt1); \ |
1128 | 438M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
1129 | 438M | if( !v ) \ |
1130 | 438M | break; \ |
1131 | 438M | } \ |
1132 | 447M | { \ |
1133 | 193M | OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ |
1134 | 193M | flags, flagsp[-1], flagsp[1], \ |
1135 | 193M | ci); \ |
1136 | 193M | opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \ |
1137 | 193M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
1138 | 193M | v = v ^ opj_t1_getspb(lu); \ |
1139 | 193M | data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \ |
1140 | 193M | opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \ |
1141 | 193M | } \ |
1142 | 193M | } while(0); \ |
1143 | 447M | } \ |
1144 | 1.64G | } |
1145 | | |
1146 | | static void opj_t1_dec_clnpass_step( |
1147 | | opj_t1_t *t1, |
1148 | | opj_flag_t *flagsp, |
1149 | | OPJ_INT32 *datap, |
1150 | | OPJ_INT32 oneplushalf, |
1151 | | OPJ_UINT32 ci, |
1152 | | OPJ_UINT32 vsc) |
1153 | 27.0M | { |
1154 | 27.0M | OPJ_UINT32 v; |
1155 | | |
1156 | 27.0M | opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ |
1157 | 27.0M | opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, |
1158 | 27.0M | *flagsp, flagsp, t1->w + 2U, datap, |
1159 | 27.0M | 0, ci, mqc, mqc->curctx, |
1160 | 27.0M | v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc); |
1161 | 27.0M | } |
1162 | | |
1163 | | static void opj_t1_enc_clnpass( |
1164 | | opj_t1_t *t1, |
1165 | | OPJ_INT32 bpno, |
1166 | | OPJ_INT32 *nmsedec, |
1167 | | OPJ_UINT32 cblksty) |
1168 | 0 | { |
1169 | 0 | OPJ_UINT32 i, k; |
1170 | 0 | const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); |
1171 | 0 | opj_mqc_t* mqc = &(t1->mqc); |
1172 | 0 | DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); |
1173 | 0 | const OPJ_INT32* datap = t1->data; |
1174 | 0 | opj_flag_t *f = &T1_FLAGS(0, 0); |
1175 | 0 | const OPJ_UINT32 extra = 2U; |
1176 | |
|
1177 | 0 | *nmsedec = 0; |
1178 | | #ifdef DEBUG_ENC_CLN |
1179 | | printf("enc_clnpass: bpno=%d\n", bpno); |
1180 | | #endif |
1181 | 0 | for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { |
1182 | | #ifdef DEBUG_ENC_CLN |
1183 | | printf(" k=%d\n", k); |
1184 | | #endif |
1185 | 0 | for (i = 0; i < t1->w; ++i, f++) { |
1186 | 0 | OPJ_UINT32 agg, runlen; |
1187 | | #ifdef DEBUG_ENC_CLN |
1188 | | printf(" i=%d\n", i); |
1189 | | #endif |
1190 | 0 | agg = !*f; |
1191 | | #ifdef DEBUG_ENC_CLN |
1192 | | printf(" agg=%d\n", agg); |
1193 | | #endif |
1194 | 0 | if (agg) { |
1195 | 0 | for (runlen = 0; runlen < 4; ++runlen, ++datap) { |
1196 | 0 | if (opj_smr_abs(*datap) & (OPJ_UINT32)one) { |
1197 | 0 | break; |
1198 | 0 | } |
1199 | 0 | } |
1200 | 0 | opj_t1_setcurctx(curctx, T1_CTXNO_AGG); |
1201 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4); |
1202 | 0 | if (runlen == 4) { |
1203 | 0 | continue; |
1204 | 0 | } |
1205 | 0 | opj_t1_setcurctx(curctx, T1_CTXNO_UNI); |
1206 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1); |
1207 | 0 | opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1); |
1208 | 0 | } else { |
1209 | 0 | runlen = 0; |
1210 | 0 | } |
1211 | 0 | opj_t1_enc_clnpass_step_macro( |
1212 | 0 | mqc, curctx, a, c, ct, |
1213 | 0 | f, |
1214 | 0 | datap, |
1215 | 0 | bpno, |
1216 | 0 | one, |
1217 | 0 | nmsedec, |
1218 | 0 | agg, |
1219 | 0 | runlen, |
1220 | 0 | 4U, |
1221 | 0 | cblksty); |
1222 | 0 | datap += 4 - runlen; |
1223 | 0 | } |
1224 | 0 | } |
1225 | 0 | if (k < t1->h) { |
1226 | 0 | const OPJ_UINT32 agg = 0; |
1227 | 0 | const OPJ_UINT32 runlen = 0; |
1228 | | #ifdef DEBUG_ENC_CLN |
1229 | | printf(" k=%d\n", k); |
1230 | | #endif |
1231 | 0 | for (i = 0; i < t1->w; ++i, f++) { |
1232 | | #ifdef DEBUG_ENC_CLN |
1233 | | printf(" i=%d\n", i); |
1234 | | printf(" agg=%d\n", agg); |
1235 | | #endif |
1236 | 0 | opj_t1_enc_clnpass_step_macro( |
1237 | 0 | mqc, curctx, a, c, ct, |
1238 | 0 | f, |
1239 | 0 | datap, |
1240 | 0 | bpno, |
1241 | 0 | one, |
1242 | 0 | nmsedec, |
1243 | 0 | agg, |
1244 | 0 | runlen, |
1245 | 0 | t1->h - k, |
1246 | 0 | cblksty); |
1247 | 0 | datap += t1->h - k; |
1248 | 0 | } |
1249 | 0 | } |
1250 | |
|
1251 | 0 | UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); |
1252 | 0 | } |
1253 | | |
1254 | 1.02M | #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \ |
1255 | 1.02M | { \ |
1256 | 1.02M | OPJ_INT32 one, half, oneplushalf; \ |
1257 | 1.02M | OPJ_UINT32 runlen; \ |
1258 | 1.02M | OPJ_UINT32 i, j, k; \ |
1259 | 1.02M | const OPJ_UINT32 l_w = w; \ |
1260 | 1.02M | opj_mqc_t* mqc = &(t1->mqc); \ |
1261 | 1.02M | register OPJ_INT32 *data = t1->data; \ |
1262 | 1.02M | register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ |
1263 | 1.02M | DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ |
1264 | 1.02M | register OPJ_UINT32 v; \ |
1265 | 1.02M | one = 1 << bpno; \ |
1266 | 1.02M | half = one >> 1; \ |
1267 | 1.02M | oneplushalf = one | half; \ |
1268 | 12.7M | for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ |
1269 | 598M | for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ |
1270 | 586M | opj_flag_t flags = *flagsp; \ |
1271 | 586M | if (flags == 0) { \ |
1272 | 187M | OPJ_UINT32 partial = OPJ_TRUE; \ |
1273 | 187M | opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \ |
1274 | 187M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
1275 | 187M | if (!v) { \ |
1276 | 177M | continue; \ |
1277 | 177M | } \ |
1278 | 187M | opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \ |
1279 | 9.22M | opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \ |
1280 | 9.22M | opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ |
1281 | 9.22M | runlen = (runlen << 1) | v; \ |
1282 | 9.22M | switch(runlen) { \ |
1283 | 2.87M | case 0: \ |
1284 | 2.87M | opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\ |
1285 | 2.87M | flags, flagsp, flags_stride, data, \ |
1286 | 2.87M | l_w, 0, mqc, curctx, \ |
1287 | 2.87M | v, a, c, ct, oneplushalf, vsc); \ |
1288 | 2.87M | partial = OPJ_FALSE; \ |
1289 | 2.87M | /* FALLTHRU */ \ |
1290 | 5.26M | case 1: \ |
1291 | 5.26M | opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ |
1292 | 5.26M | flags, flagsp, flags_stride, data, \ |
1293 | 5.26M | l_w, 1, mqc, curctx, \ |
1294 | 5.26M | v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
1295 | 5.26M | partial = OPJ_FALSE; \ |
1296 | 5.26M | /* FALLTHRU */ \ |
1297 | 7.46M | case 2: \ |
1298 | 7.46M | opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ |
1299 | 7.46M | flags, flagsp, flags_stride, data, \ |
1300 | 7.46M | l_w, 2, mqc, curctx, \ |
1301 | 7.46M | v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
1302 | 7.46M | partial = OPJ_FALSE; \ |
1303 | 7.46M | /* FALLTHRU */ \ |
1304 | 9.22M | case 3: \ |
1305 | 9.22M | opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ |
1306 | 9.22M | flags, flagsp, flags_stride, data, \ |
1307 | 9.22M | l_w, 3, mqc, curctx, \ |
1308 | 9.22M | v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
1309 | 9.22M | break; \ |
1310 | 9.22M | } \ |
1311 | 399M | } else { \ |
1312 | 399M | opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ |
1313 | 399M | flags, flagsp, flags_stride, data, \ |
1314 | 399M | l_w, 0, mqc, curctx, \ |
1315 | 399M | v, a, c, ct, oneplushalf, vsc); \ |
1316 | 399M | opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ |
1317 | 399M | flags, flagsp, flags_stride, data, \ |
1318 | 399M | l_w, 1, mqc, curctx, \ |
1319 | 399M | v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
1320 | 399M | opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ |
1321 | 399M | flags, flagsp, flags_stride, data, \ |
1322 | 399M | l_w, 2, mqc, curctx, \ |
1323 | 399M | v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
1324 | 399M | opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ |
1325 | 399M | flags, flagsp, flags_stride, data, \ |
1326 | 399M | l_w, 3, mqc, curctx, \ |
1327 | 399M | v, a, c, ct, oneplushalf, OPJ_FALSE); \ |
1328 | 399M | } \ |
1329 | 586M | *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ |
1330 | 408M | } \ |
1331 | 11.6M | } \ |
1332 | 1.02M | UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ |
1333 | 1.02M | if( k < h ) { \ |
1334 | 10.5M | for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ |
1335 | 37.4M | for (j = 0; j < h - k; ++j) { \ |
1336 | 27.0M | opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \ |
1337 | 27.0M | } \ |
1338 | 10.4M | *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ |
1339 | 10.4M | } \ |
1340 | 94.1k | } \ |
1341 | 1.02M | } |
1342 | | |
1343 | | static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty) |
1344 | 1.02M | { |
1345 | 1.02M | if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) { |
1346 | 774k | opj_mqc_t* mqc = &(t1->mqc); |
1347 | 774k | OPJ_UINT32 v, v2; |
1348 | 774k | opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); |
1349 | 774k | opj_mqc_decode(v, mqc); |
1350 | 774k | opj_mqc_decode(v2, mqc); |
1351 | 774k | v = (v << 1) | v2; |
1352 | 774k | opj_mqc_decode(v2, mqc); |
1353 | 774k | v = (v << 1) | v2; |
1354 | 774k | opj_mqc_decode(v2, mqc); |
1355 | 774k | v = (v << 1) | v2; |
1356 | | /* |
1357 | | if (v!=0xa) { |
1358 | | opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); |
1359 | | } |
1360 | | */ |
1361 | 774k | } |
1362 | 1.02M | } |
1363 | | |
1364 | | static void opj_t1_dec_clnpass_64x64_novsc( |
1365 | | opj_t1_t *t1, |
1366 | | OPJ_INT32 bpno) |
1367 | 132k | { |
1368 | 132k | opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66); |
1369 | 132k | } |
1370 | | |
1371 | | static void opj_t1_dec_clnpass_64x64_vsc( |
1372 | | opj_t1_t *t1, |
1373 | | OPJ_INT32 bpno) |
1374 | 200k | { |
1375 | 200k | opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66); |
1376 | 200k | } |
1377 | | |
1378 | | static void opj_t1_dec_clnpass_generic_novsc( |
1379 | | opj_t1_t *t1, |
1380 | | OPJ_INT32 bpno) |
1381 | 271k | { |
1382 | 271k | opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h, |
1383 | 271k | t1->w + 2U); |
1384 | 271k | } |
1385 | | |
1386 | | static void opj_t1_dec_clnpass_generic_vsc( |
1387 | | opj_t1_t *t1, |
1388 | | OPJ_INT32 bpno) |
1389 | 419k | { |
1390 | 419k | opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h, |
1391 | 419k | t1->w + 2U); |
1392 | 419k | } |
1393 | | |
1394 | | static void opj_t1_dec_clnpass( |
1395 | | opj_t1_t *t1, |
1396 | | OPJ_INT32 bpno, |
1397 | | OPJ_INT32 cblksty) |
1398 | 1.02M | { |
1399 | 1.02M | if (t1->w == 64 && t1->h == 64) { |
1400 | 333k | if (cblksty & J2K_CCP_CBLKSTY_VSC) { |
1401 | 200k | opj_t1_dec_clnpass_64x64_vsc(t1, bpno); |
1402 | 200k | } else { |
1403 | 132k | opj_t1_dec_clnpass_64x64_novsc(t1, bpno); |
1404 | 132k | } |
1405 | 690k | } else { |
1406 | 690k | if (cblksty & J2K_CCP_CBLKSTY_VSC) { |
1407 | 419k | opj_t1_dec_clnpass_generic_vsc(t1, bpno); |
1408 | 419k | } else { |
1409 | 271k | opj_t1_dec_clnpass_generic_novsc(t1, bpno); |
1410 | 271k | } |
1411 | 690k | } |
1412 | 1.02M | opj_t1_dec_clnpass_check_segsym(t1, cblksty); |
1413 | 1.02M | } |
1414 | | |
1415 | | |
1416 | | static OPJ_FLOAT64 opj_t1_getwmsedec( |
1417 | | OPJ_INT32 nmsedec, |
1418 | | OPJ_UINT32 compno, |
1419 | | OPJ_UINT32 level, |
1420 | | OPJ_UINT32 orient, |
1421 | | OPJ_INT32 bpno, |
1422 | | OPJ_UINT32 qmfbid, |
1423 | | OPJ_FLOAT64 stepsize, |
1424 | | OPJ_UINT32 numcomps, |
1425 | | const OPJ_FLOAT64 * mct_norms, |
1426 | | OPJ_UINT32 mct_numcomps) |
1427 | 0 | { |
1428 | 0 | OPJ_FLOAT64 w1 = 1, w2, wmsedec; |
1429 | 0 | OPJ_ARG_NOT_USED(numcomps); |
1430 | |
|
1431 | 0 | if (mct_norms && (compno < mct_numcomps)) { |
1432 | 0 | w1 = mct_norms[compno]; |
1433 | 0 | } |
1434 | |
|
1435 | 0 | if (qmfbid == 1) { |
1436 | 0 | w2 = opj_dwt_getnorm(level, orient); |
1437 | 0 | } else { /* if (qmfbid == 0) */ |
1438 | 0 | const OPJ_INT32 log2_gain = (orient == 0) ? 0 : |
1439 | 0 | (orient == 3) ? 2 : 1; |
1440 | 0 | w2 = opj_dwt_getnorm_real(level, orient); |
1441 | | /* Not sure this is right. But preserves past behaviour */ |
1442 | 0 | stepsize /= (1 << log2_gain); |
1443 | 0 | } |
1444 | |
|
1445 | 0 | wmsedec = w1 * w2 * stepsize * (1 << bpno); |
1446 | 0 | wmsedec *= wmsedec * nmsedec / 8192.0; |
1447 | |
|
1448 | 0 | return wmsedec; |
1449 | 0 | } |
1450 | | |
1451 | | static OPJ_BOOL opj_t1_allocate_buffers( |
1452 | | opj_t1_t *t1, |
1453 | | OPJ_UINT32 w, |
1454 | | OPJ_UINT32 h) |
1455 | 1.81M | { |
1456 | 1.81M | OPJ_UINT32 flagssize; |
1457 | 1.81M | OPJ_UINT32 flags_stride; |
1458 | | |
1459 | | /* No risk of overflow. Prior checks ensure those assert are met */ |
1460 | | /* They are per the specification */ |
1461 | 1.81M | assert(w <= 1024); |
1462 | 1.81M | assert(h <= 1024); |
1463 | 1.81M | assert(w * h <= 4096); |
1464 | | |
1465 | | /* encoder uses tile buffer, so no need to allocate */ |
1466 | 1.81M | { |
1467 | 1.81M | OPJ_UINT32 datasize = w * h; |
1468 | | |
1469 | 1.81M | if (datasize > t1->datasize) { |
1470 | 1.91k | opj_aligned_free(t1->data); |
1471 | 1.91k | t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32)); |
1472 | 1.91k | if (!t1->data) { |
1473 | | /* FIXME event manager error callback */ |
1474 | 0 | return OPJ_FALSE; |
1475 | 0 | } |
1476 | 1.91k | t1->datasize = datasize; |
1477 | 1.91k | } |
1478 | | /* memset first arg is declared to never be null by gcc */ |
1479 | 1.81M | if (t1->data != NULL) { |
1480 | 1.81M | memset(t1->data, 0, datasize * sizeof(OPJ_INT32)); |
1481 | 1.81M | } |
1482 | 1.81M | } |
1483 | | |
1484 | 0 | flags_stride = w + 2U; /* can't be 0U */ |
1485 | | |
1486 | 1.81M | flagssize = (h + 3U) / 4U + 2U; |
1487 | | |
1488 | 1.81M | flagssize *= flags_stride; |
1489 | 1.81M | { |
1490 | 1.81M | opj_flag_t* p; |
1491 | 1.81M | OPJ_UINT32 x; |
1492 | 1.81M | OPJ_UINT32 flags_height = (h + 3U) / 4U; |
1493 | | |
1494 | 1.81M | if (flagssize > t1->flagssize) { |
1495 | | |
1496 | 37.3k | opj_aligned_free(t1->flags); |
1497 | 37.3k | t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof( |
1498 | 37.3k | opj_flag_t)); |
1499 | 37.3k | if (!t1->flags) { |
1500 | | /* FIXME event manager error callback */ |
1501 | 0 | return OPJ_FALSE; |
1502 | 0 | } |
1503 | 37.3k | } |
1504 | 1.81M | t1->flagssize = flagssize; |
1505 | | |
1506 | 1.81M | memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); |
1507 | | |
1508 | 1.81M | p = &t1->flags[0]; |
1509 | 388M | for (x = 0; x < flags_stride; ++x) { |
1510 | | /* magic value to hopefully stop any passes being interested in this entry */ |
1511 | 386M | *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); |
1512 | 386M | } |
1513 | | |
1514 | 1.81M | p = &t1->flags[((flags_height + 1) * flags_stride)]; |
1515 | 388M | for (x = 0; x < flags_stride; ++x) { |
1516 | | /* magic value to hopefully stop any passes being interested in this entry */ |
1517 | 386M | *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); |
1518 | 386M | } |
1519 | | |
1520 | 1.81M | if (h % 4) { |
1521 | 69.1k | OPJ_UINT32 v = 0; |
1522 | 69.1k | p = &t1->flags[((flags_height) * flags_stride)]; |
1523 | 69.1k | if (h % 4 == 1) { |
1524 | 4.00k | v |= T1_PI_1 | T1_PI_2 | T1_PI_3; |
1525 | 65.1k | } else if (h % 4 == 2) { |
1526 | 27.0k | v |= T1_PI_2 | T1_PI_3; |
1527 | 38.1k | } else if (h % 4 == 3) { |
1528 | 38.1k | v |= T1_PI_3; |
1529 | 38.1k | } |
1530 | 9.02M | for (x = 0; x < flags_stride; ++x) { |
1531 | 8.95M | *p++ = v; |
1532 | 8.95M | } |
1533 | 69.1k | } |
1534 | 1.81M | } |
1535 | | |
1536 | 0 | t1->w = w; |
1537 | 1.81M | t1->h = h; |
1538 | | |
1539 | 1.81M | return OPJ_TRUE; |
1540 | 1.81M | } |
1541 | | |
1542 | | /* ----------------------------------------------------------------------- */ |
1543 | | |
1544 | | /* ----------------------------------------------------------------------- */ |
1545 | | /** |
1546 | | * Creates a new Tier 1 handle |
1547 | | * and initializes the look-up tables of the Tier-1 coder/decoder |
1548 | | * @return a new T1 handle if successful, returns NULL otherwise |
1549 | | */ |
1550 | | opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder) |
1551 | 938 | { |
1552 | 938 | opj_t1_t *l_t1 = 00; |
1553 | | |
1554 | 938 | l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t)); |
1555 | 938 | if (!l_t1) { |
1556 | 0 | return 00; |
1557 | 0 | } |
1558 | | |
1559 | 938 | l_t1->encoder = isEncoder; |
1560 | | |
1561 | 938 | return l_t1; |
1562 | 938 | } |
1563 | | |
1564 | | |
1565 | | /** |
1566 | | * Destroys a previously created T1 handle |
1567 | | * |
1568 | | * @param p_t1 Tier 1 handle to destroy |
1569 | | */ |
1570 | | void opj_t1_destroy(opj_t1_t *p_t1) |
1571 | 938 | { |
1572 | 938 | if (! p_t1) { |
1573 | 0 | return; |
1574 | 0 | } |
1575 | | |
1576 | 938 | if (p_t1->data) { |
1577 | 938 | opj_aligned_free(p_t1->data); |
1578 | 938 | p_t1->data = 00; |
1579 | 938 | } |
1580 | | |
1581 | 938 | if (p_t1->flags) { |
1582 | 938 | opj_aligned_free(p_t1->flags); |
1583 | 938 | p_t1->flags = 00; |
1584 | 938 | } |
1585 | | |
1586 | 938 | opj_free(p_t1->cblkdatabuffer); |
1587 | | |
1588 | 938 | opj_free(p_t1); |
1589 | 938 | } |
1590 | | |
1591 | | typedef struct { |
1592 | | OPJ_BOOL whole_tile_decoding; |
1593 | | OPJ_UINT32 resno; |
1594 | | opj_tcd_cblk_dec_t* cblk; |
1595 | | opj_tcd_band_t* band; |
1596 | | opj_tcd_tilecomp_t* tilec; |
1597 | | opj_tccp_t* tccp; |
1598 | | OPJ_BOOL mustuse_cblkdatabuffer; |
1599 | | volatile OPJ_BOOL* pret; |
1600 | | opj_event_mgr_t *p_manager; |
1601 | | opj_mutex_t* p_manager_mutex; |
1602 | | OPJ_BOOL check_pterm; |
1603 | | } opj_t1_cblk_decode_processing_job_t; |
1604 | | |
1605 | | static void opj_t1_destroy_wrapper(void* t1) |
1606 | 938 | { |
1607 | 938 | opj_t1_destroy((opj_t1_t*) t1); |
1608 | 938 | } |
1609 | | |
1610 | | static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) |
1611 | 1.81M | { |
1612 | 1.81M | opj_tcd_cblk_dec_t* cblk; |
1613 | 1.81M | opj_tcd_band_t* band; |
1614 | 1.81M | opj_tcd_tilecomp_t* tilec; |
1615 | 1.81M | opj_tccp_t* tccp; |
1616 | 1.81M | OPJ_INT32* OPJ_RESTRICT datap; |
1617 | 1.81M | OPJ_UINT32 cblk_w, cblk_h; |
1618 | 1.81M | OPJ_INT32 x, y; |
1619 | 1.81M | OPJ_UINT32 i, j; |
1620 | 1.81M | opj_t1_cblk_decode_processing_job_t* job; |
1621 | 1.81M | opj_t1_t* t1; |
1622 | 1.81M | OPJ_UINT32 resno; |
1623 | 1.81M | OPJ_UINT32 tile_w; |
1624 | | |
1625 | 1.81M | job = (opj_t1_cblk_decode_processing_job_t*) user_data; |
1626 | | |
1627 | 1.81M | cblk = job->cblk; |
1628 | | |
1629 | 1.81M | if (!job->whole_tile_decoding) { |
1630 | 0 | cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); |
1631 | 0 | cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); |
1632 | |
|
1633 | 0 | cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) * |
1634 | 0 | cblk_w * cblk_h); |
1635 | 0 | if (cblk->decoded_data == NULL) { |
1636 | 0 | if (job->p_manager_mutex) { |
1637 | 0 | opj_mutex_lock(job->p_manager_mutex); |
1638 | 0 | } |
1639 | 0 | opj_event_msg(job->p_manager, EVT_ERROR, |
1640 | 0 | "Cannot allocate cblk->decoded_data\n"); |
1641 | 0 | if (job->p_manager_mutex) { |
1642 | 0 | opj_mutex_unlock(job->p_manager_mutex); |
1643 | 0 | } |
1644 | 0 | *(job->pret) = OPJ_FALSE; |
1645 | 0 | opj_free(job); |
1646 | 0 | return; |
1647 | 0 | } |
1648 | | /* Zero-init required */ |
1649 | 0 | memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h); |
1650 | 1.81M | } else if (cblk->decoded_data) { |
1651 | | /* Not sure if that code path can happen, but better be */ |
1652 | | /* safe than sorry */ |
1653 | 0 | opj_aligned_free(cblk->decoded_data); |
1654 | 0 | cblk->decoded_data = NULL; |
1655 | 0 | } |
1656 | | |
1657 | 1.81M | resno = job->resno; |
1658 | 1.81M | band = job->band; |
1659 | 1.81M | tilec = job->tilec; |
1660 | 1.81M | tccp = job->tccp; |
1661 | 1.81M | tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1 |
1662 | 1.81M | - |
1663 | 1.81M | tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); |
1664 | | |
1665 | 1.81M | if (!*(job->pret)) { |
1666 | 0 | opj_free(job); |
1667 | 0 | return; |
1668 | 0 | } |
1669 | | |
1670 | 1.81M | t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); |
1671 | 1.81M | if (t1 == NULL) { |
1672 | 938 | t1 = opj_t1_create(OPJ_FALSE); |
1673 | 938 | if (t1 == NULL) { |
1674 | 0 | opj_event_msg(job->p_manager, EVT_ERROR, |
1675 | 0 | "Cannot allocate Tier 1 handle\n"); |
1676 | 0 | *(job->pret) = OPJ_FALSE; |
1677 | 0 | opj_free(job); |
1678 | 0 | return; |
1679 | 0 | } |
1680 | 938 | if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) { |
1681 | 0 | opj_event_msg(job->p_manager, EVT_ERROR, |
1682 | 0 | "Unable to set t1 handle as TLS\n"); |
1683 | 0 | opj_t1_destroy(t1); |
1684 | 0 | *(job->pret) = OPJ_FALSE; |
1685 | 0 | opj_free(job); |
1686 | 0 | return; |
1687 | 0 | } |
1688 | 938 | } |
1689 | 1.81M | t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer; |
1690 | | |
1691 | 1.81M | if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) { |
1692 | 1.30k | if (OPJ_FALSE == opj_t1_ht_decode_cblk( |
1693 | 1.30k | t1, |
1694 | 1.30k | cblk, |
1695 | 1.30k | band->bandno, |
1696 | 1.30k | (OPJ_UINT32)tccp->roishift, |
1697 | 1.30k | tccp->cblksty, |
1698 | 1.30k | job->p_manager, |
1699 | 1.30k | job->p_manager_mutex, |
1700 | 1.30k | job->check_pterm)) { |
1701 | 29 | *(job->pret) = OPJ_FALSE; |
1702 | 29 | opj_free(job); |
1703 | 29 | return; |
1704 | 29 | } |
1705 | 1.81M | } else { |
1706 | 1.81M | if (OPJ_FALSE == opj_t1_decode_cblk( |
1707 | 1.81M | t1, |
1708 | 1.81M | cblk, |
1709 | 1.81M | band->bandno, |
1710 | 1.81M | (OPJ_UINT32)tccp->roishift, |
1711 | 1.81M | tccp->cblksty, |
1712 | 1.81M | job->p_manager, |
1713 | 1.81M | job->p_manager_mutex, |
1714 | 1.81M | job->check_pterm)) { |
1715 | 1 | *(job->pret) = OPJ_FALSE; |
1716 | 1 | opj_free(job); |
1717 | 1 | return; |
1718 | 1 | } |
1719 | 1.81M | } |
1720 | | |
1721 | 1.81M | x = cblk->x0 - band->x0; |
1722 | 1.81M | y = cblk->y0 - band->y0; |
1723 | 1.81M | if (band->bandno & 1) { |
1724 | 91.8k | opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; |
1725 | 91.8k | x += pres->x1 - pres->x0; |
1726 | 91.8k | } |
1727 | 1.81M | if (band->bandno & 2) { |
1728 | 90.5k | opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; |
1729 | 90.5k | y += pres->y1 - pres->y0; |
1730 | 90.5k | } |
1731 | | |
1732 | 1.81M | datap = cblk->decoded_data ? cblk->decoded_data : t1->data; |
1733 | 1.81M | cblk_w = t1->w; |
1734 | 1.81M | cblk_h = t1->h; |
1735 | | |
1736 | 1.81M | if (tccp->roishift) { |
1737 | 2 | if (tccp->roishift >= 31) { |
1738 | 0 | for (j = 0; j < cblk_h; ++j) { |
1739 | 0 | for (i = 0; i < cblk_w; ++i) { |
1740 | 0 | datap[(j * cblk_w) + i] = 0; |
1741 | 0 | } |
1742 | 0 | } |
1743 | 2 | } else { |
1744 | 2 | OPJ_INT32 thresh = 1 << tccp->roishift; |
1745 | 10 | for (j = 0; j < cblk_h; ++j) { |
1746 | 12 | for (i = 0; i < cblk_w; ++i) { |
1747 | 4 | OPJ_INT32 val = datap[(j * cblk_w) + i]; |
1748 | 4 | OPJ_INT32 mag = abs(val); |
1749 | 4 | if (mag >= thresh) { |
1750 | 0 | mag >>= tccp->roishift; |
1751 | 0 | datap[(j * cblk_w) + i] = val < 0 ? -mag : mag; |
1752 | 0 | } |
1753 | 4 | } |
1754 | 8 | } |
1755 | 2 | } |
1756 | 2 | } |
1757 | | |
1758 | | /* Both can be non NULL if for example decoding a full tile and then */ |
1759 | | /* partially a tile. In which case partial decoding should be the */ |
1760 | | /* priority */ |
1761 | 1.81M | assert((cblk->decoded_data != NULL) || (tilec->data != NULL)); |
1762 | | |
1763 | 1.81M | if (cblk->decoded_data) { |
1764 | 0 | OPJ_UINT32 cblk_size = cblk_w * cblk_h; |
1765 | 0 | if (tccp->qmfbid == 1) { |
1766 | 0 | for (i = 0; i < cblk_size; ++i) { |
1767 | 0 | datap[i] /= 2; |
1768 | 0 | } |
1769 | 0 | } else { /* if (tccp->qmfbid == 0) */ |
1770 | 0 | const float stepsize = 0.5f * band->stepsize; |
1771 | 0 | i = 0; |
1772 | 0 | #ifdef __SSE2__ |
1773 | 0 | { |
1774 | 0 | const __m128 xmm_stepsize = _mm_set1_ps(stepsize); |
1775 | 0 | for (; i < (cblk_size & ~15U); i += 16) { |
1776 | 0 | __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( |
1777 | 0 | datap + 0))); |
1778 | 0 | __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( |
1779 | 0 | datap + 4))); |
1780 | 0 | __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( |
1781 | 0 | datap + 8))); |
1782 | 0 | __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( |
1783 | 0 | datap + 12))); |
1784 | 0 | _mm_store_ps((float*)(datap + 0), _mm_mul_ps(xmm0_data, xmm_stepsize)); |
1785 | 0 | _mm_store_ps((float*)(datap + 4), _mm_mul_ps(xmm1_data, xmm_stepsize)); |
1786 | 0 | _mm_store_ps((float*)(datap + 8), _mm_mul_ps(xmm2_data, xmm_stepsize)); |
1787 | 0 | _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize)); |
1788 | 0 | datap += 16; |
1789 | 0 | } |
1790 | 0 | } |
1791 | 0 | #endif |
1792 | 0 | for (; i < cblk_size; ++i) { |
1793 | 0 | OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize; |
1794 | 0 | memcpy(datap, &tmp, sizeof(tmp)); |
1795 | 0 | datap++; |
1796 | 0 | } |
1797 | 0 | } |
1798 | 1.81M | } else if (tccp->qmfbid == 1) { |
1799 | 1.74M | OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + |
1800 | 1.74M | (OPJ_SIZE_T)x]; |
1801 | 59.6M | for (j = 0; j < cblk_h; ++j) { |
1802 | | //positive -> round down aka. (83)/2 = 41.5 -> 41 |
1803 | | //negative -> round up aka. (-83)/2 = -41.5 -> -41 |
1804 | | #if defined(__AVX512F__) |
1805 | | OPJ_INT32* ptr_in = datap + (j * cblk_w); |
1806 | | OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w); |
1807 | | for (i = 0; i < cblk_w / 16; ++i) { |
1808 | | __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in)); |
1809 | | const __m512i add_avx = _mm512_srli_epi32(in_avx, 31); |
1810 | | in_avx = _mm512_add_epi32(in_avx, add_avx); |
1811 | | _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1)); |
1812 | | ptr_in += 16; |
1813 | | ptr_out += 16; |
1814 | | } |
1815 | | |
1816 | | for (i = 0; i < cblk_w % 16; ++i) { |
1817 | | ptr_out[i] = ptr_in[i] / 2; |
1818 | | } |
1819 | | #elif defined(__AVX2__) |
1820 | | OPJ_INT32* ptr_in = datap + (j * cblk_w); |
1821 | | OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w); |
1822 | | for (i = 0; i < cblk_w / 8; ++i) { |
1823 | | __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in)); |
1824 | | const __m256i add_avx = _mm256_srli_epi32(in_avx, 31); |
1825 | | in_avx = _mm256_add_epi32(in_avx, add_avx); |
1826 | | _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1)); |
1827 | | ptr_in += 8; |
1828 | | ptr_out += 8; |
1829 | | } |
1830 | | |
1831 | | for (i = 0; i < cblk_w % 8; ++i) { |
1832 | | ptr_out[i] = ptr_in[i] / 2; |
1833 | | } |
1834 | | #else |
1835 | 57.9M | i = 0; |
1836 | 1.10G | for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { |
1837 | 1.04G | OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U]; |
1838 | 1.04G | OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; |
1839 | 1.04G | OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; |
1840 | 1.04G | OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; |
1841 | 1.04G | ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2; |
1842 | 1.04G | ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2; |
1843 | 1.04G | ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2; |
1844 | 1.04G | ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2; |
1845 | 1.04G | } |
1846 | 60.5M | for (; i < cblk_w; ++i) { |
1847 | 2.62M | OPJ_INT32 tmp = datap[(j * cblk_w) + i]; |
1848 | 2.62M | ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2; |
1849 | 2.62M | } |
1850 | 57.9M | #endif |
1851 | 57.9M | } |
1852 | 1.74M | } else { /* if (tccp->qmfbid == 0) */ |
1853 | 64.7k | const float stepsize = 0.5f * band->stepsize; |
1854 | 64.7k | OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * |
1855 | 64.7k | tile_w + (OPJ_SIZE_T)x]; |
1856 | 2.68M | for (j = 0; j < cblk_h; ++j) { |
1857 | 2.62M | OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; |
1858 | 91.5M | for (i = 0; i < cblk_w; ++i) { |
1859 | 88.9M | OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize; |
1860 | 88.9M | *tiledp2 = tmp; |
1861 | 88.9M | datap++; |
1862 | 88.9M | tiledp2++; |
1863 | 88.9M | } |
1864 | 2.62M | tiledp += tile_w; |
1865 | 2.62M | } |
1866 | 64.7k | } |
1867 | | |
1868 | 1.81M | opj_free(job); |
1869 | 1.81M | } |
1870 | | |
1871 | | |
1872 | | void opj_t1_decode_cblks(opj_tcd_t* tcd, |
1873 | | volatile OPJ_BOOL* pret, |
1874 | | opj_tcd_tilecomp_t* tilec, |
1875 | | opj_tccp_t* tccp, |
1876 | | opj_event_mgr_t *p_manager, |
1877 | | opj_mutex_t* p_manager_mutex, |
1878 | | OPJ_BOOL check_pterm |
1879 | | ) |
1880 | 4.87k | { |
1881 | 4.87k | opj_thread_pool_t* tp = tcd->thread_pool; |
1882 | 4.87k | OPJ_UINT32 resno, bandno, precno, cblkno; |
1883 | | |
1884 | | #ifdef DEBUG_VERBOSE |
1885 | | OPJ_UINT32 codeblocks_decoded = 0; |
1886 | | printf("Enter opj_t1_decode_cblks()\n"); |
1887 | | #endif |
1888 | | |
1889 | 11.8k | for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) { |
1890 | 7.00k | opj_tcd_resolution_t* res = &tilec->resolutions[resno]; |
1891 | | |
1892 | 18.2k | for (bandno = 0; bandno < res->numbands; ++bandno) { |
1893 | 11.2k | opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; |
1894 | | |
1895 | 234k | for (precno = 0; precno < res->pw * res->ph; ++precno) { |
1896 | 223k | opj_tcd_precinct_t* precinct = &band->precincts[precno]; |
1897 | | |
1898 | 223k | if (!opj_tcd_is_subband_area_of_interest(tcd, |
1899 | 223k | tilec->compno, |
1900 | 223k | resno, |
1901 | 223k | band->bandno, |
1902 | 223k | (OPJ_UINT32)precinct->x0, |
1903 | 223k | (OPJ_UINT32)precinct->y0, |
1904 | 223k | (OPJ_UINT32)precinct->x1, |
1905 | 223k | (OPJ_UINT32)precinct->y1)) { |
1906 | 247 | for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { |
1907 | 0 | opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; |
1908 | 0 | if (cblk->decoded_data) { |
1909 | | #ifdef DEBUG_VERBOSE |
1910 | | printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", |
1911 | | cblk->x0, cblk->y0, resno, bandno); |
1912 | | #endif |
1913 | 0 | opj_aligned_free(cblk->decoded_data); |
1914 | 0 | cblk->decoded_data = NULL; |
1915 | 0 | } |
1916 | 0 | } |
1917 | 247 | continue; |
1918 | 247 | } |
1919 | | |
1920 | 2.03M | for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { |
1921 | 1.81M | opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; |
1922 | 1.81M | opj_t1_cblk_decode_processing_job_t* job; |
1923 | | |
1924 | 1.81M | if (!opj_tcd_is_subband_area_of_interest(tcd, |
1925 | 1.81M | tilec->compno, |
1926 | 1.81M | resno, |
1927 | 1.81M | band->bandno, |
1928 | 1.81M | (OPJ_UINT32)cblk->x0, |
1929 | 1.81M | (OPJ_UINT32)cblk->y0, |
1930 | 1.81M | (OPJ_UINT32)cblk->x1, |
1931 | 1.81M | (OPJ_UINT32)cblk->y1)) { |
1932 | 0 | if (cblk->decoded_data) { |
1933 | | #ifdef DEBUG_VERBOSE |
1934 | | printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", |
1935 | | cblk->x0, cblk->y0, resno, bandno); |
1936 | | #endif |
1937 | 0 | opj_aligned_free(cblk->decoded_data); |
1938 | 0 | cblk->decoded_data = NULL; |
1939 | 0 | } |
1940 | 0 | continue; |
1941 | 0 | } |
1942 | | |
1943 | 1.81M | if (!tcd->whole_tile_decoding) { |
1944 | 0 | OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); |
1945 | 0 | OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); |
1946 | 0 | if (cblk->decoded_data != NULL) { |
1947 | | #ifdef DEBUG_VERBOSE |
1948 | | printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n", |
1949 | | cblk->x0, cblk->y0, resno, bandno); |
1950 | | #endif |
1951 | 0 | continue; |
1952 | 0 | } |
1953 | 0 | if (cblk_w == 0 || cblk_h == 0) { |
1954 | 0 | continue; |
1955 | 0 | } |
1956 | | #ifdef DEBUG_VERBOSE |
1957 | | printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n", |
1958 | | cblk->x0, cblk->y0, resno, bandno); |
1959 | | #endif |
1960 | 0 | } |
1961 | | |
1962 | 1.81M | job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, |
1963 | 1.81M | sizeof(opj_t1_cblk_decode_processing_job_t)); |
1964 | 1.81M | if (!job) { |
1965 | 0 | *pret = OPJ_FALSE; |
1966 | 0 | return; |
1967 | 0 | } |
1968 | 1.81M | job->whole_tile_decoding = tcd->whole_tile_decoding; |
1969 | 1.81M | job->resno = resno; |
1970 | 1.81M | job->cblk = cblk; |
1971 | 1.81M | job->band = band; |
1972 | 1.81M | job->tilec = tilec; |
1973 | 1.81M | job->tccp = tccp; |
1974 | 1.81M | job->pret = pret; |
1975 | 1.81M | job->p_manager_mutex = p_manager_mutex; |
1976 | 1.81M | job->p_manager = p_manager; |
1977 | 1.81M | job->check_pterm = check_pterm; |
1978 | 1.81M | job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1; |
1979 | 1.81M | opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job); |
1980 | | #ifdef DEBUG_VERBOSE |
1981 | | codeblocks_decoded ++; |
1982 | | #endif |
1983 | 1.81M | if (!(*pret)) { |
1984 | 30 | return; |
1985 | 30 | } |
1986 | 1.81M | } /* cblkno */ |
1987 | 223k | } /* precno */ |
1988 | 11.2k | } /* bandno */ |
1989 | 7.00k | } /* resno */ |
1990 | | |
1991 | | #ifdef DEBUG_VERBOSE |
1992 | | printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded); |
1993 | | #endif |
1994 | 4.84k | return; |
1995 | 4.87k | } |
1996 | | |
1997 | | |
1998 | | static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, |
1999 | | opj_tcd_cblk_dec_t* cblk, |
2000 | | OPJ_UINT32 orient, |
2001 | | OPJ_UINT32 roishift, |
2002 | | OPJ_UINT32 cblksty, |
2003 | | opj_event_mgr_t *p_manager, |
2004 | | opj_mutex_t* p_manager_mutex, |
2005 | | OPJ_BOOL check_pterm) |
2006 | 1.81M | { |
2007 | 1.81M | opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ |
2008 | | |
2009 | 1.81M | OPJ_INT32 bpno_plus_one; |
2010 | 1.81M | OPJ_UINT32 passtype; |
2011 | 1.81M | OPJ_UINT32 segno, passno; |
2012 | 1.81M | OPJ_BYTE* cblkdata = NULL; |
2013 | 1.81M | OPJ_UINT32 cblkdataindex = 0; |
2014 | 1.81M | OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */ |
2015 | 1.81M | OPJ_INT32* original_t1_data = NULL; |
2016 | | |
2017 | 1.81M | mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); |
2018 | | |
2019 | 1.81M | if (!opj_t1_allocate_buffers( |
2020 | 1.81M | t1, |
2021 | 1.81M | (OPJ_UINT32)(cblk->x1 - cblk->x0), |
2022 | 1.81M | (OPJ_UINT32)(cblk->y1 - cblk->y0))) { |
2023 | 0 | return OPJ_FALSE; |
2024 | 0 | } |
2025 | | |
2026 | 1.81M | bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps); |
2027 | 1.81M | if (bpno_plus_one >= 31) { |
2028 | 1 | if (p_manager_mutex) { |
2029 | 1 | opj_mutex_lock(p_manager_mutex); |
2030 | 1 | } |
2031 | 1 | opj_event_msg(p_manager, EVT_WARNING, |
2032 | 1 | "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n", |
2033 | 1 | bpno_plus_one); |
2034 | 1 | if (p_manager_mutex) { |
2035 | 1 | opj_mutex_unlock(p_manager_mutex); |
2036 | 1 | } |
2037 | 1 | return OPJ_FALSE; |
2038 | 1 | } |
2039 | 1.81M | passtype = 2; |
2040 | | |
2041 | 1.81M | opj_mqc_resetstates(mqc); |
2042 | 1.81M | opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); |
2043 | 1.81M | opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); |
2044 | 1.81M | opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); |
2045 | | |
2046 | 1.81M | if (cblk->corrupted) { |
2047 | 0 | assert(cblk->numchunks == 0); |
2048 | 0 | return OPJ_TRUE; |
2049 | 0 | } |
2050 | | |
2051 | | /* Even if we have a single chunk, in multi-threaded decoding */ |
2052 | | /* the insertion of our synthetic marker might potentially override */ |
2053 | | /* valid codestream of other codeblocks decoded in parallel. */ |
2054 | 1.81M | if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer && |
2055 | 1.70M | cblk->numchunks > 0)) { |
2056 | 109k | OPJ_UINT32 i; |
2057 | 109k | OPJ_UINT32 cblk_len; |
2058 | | |
2059 | | /* Compute whole codeblock length from chunk lengths */ |
2060 | 109k | cblk_len = 0; |
2061 | 539k | for (i = 0; i < cblk->numchunks; i++) { |
2062 | 430k | cblk_len += cblk->chunks[i].len; |
2063 | 430k | } |
2064 | | |
2065 | | /* Allocate temporary memory if needed */ |
2066 | 109k | if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) { |
2067 | 2.16k | cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer, |
2068 | 2.16k | cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA); |
2069 | 2.16k | if (cblkdata == NULL) { |
2070 | 0 | return OPJ_FALSE; |
2071 | 0 | } |
2072 | 2.16k | t1->cblkdatabuffer = cblkdata; |
2073 | 2.16k | memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA); |
2074 | 2.16k | t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA; |
2075 | 2.16k | } |
2076 | | |
2077 | | /* Concatenate all chunks */ |
2078 | 109k | cblkdata = t1->cblkdatabuffer; |
2079 | 109k | cblk_len = 0; |
2080 | 539k | for (i = 0; i < cblk->numchunks; i++) { |
2081 | 430k | memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len); |
2082 | 430k | cblk_len += cblk->chunks[i].len; |
2083 | 430k | } |
2084 | 1.70M | } else if (cblk->numchunks == 1) { |
2085 | 175k | cblkdata = cblk->chunks[0].data; |
2086 | 1.52M | } else { |
2087 | | /* Not sure if that can happen in practice, but avoid Coverity to */ |
2088 | | /* think we will dereference a null cblkdta pointer */ |
2089 | 1.52M | return OPJ_TRUE; |
2090 | 1.52M | } |
2091 | | |
2092 | | /* For subtile decoding, directly decode in the decoded_data buffer of */ |
2093 | | /* the code-block. Hack t1->data to point to it, and restore it later */ |
2094 | 284k | if (cblk->decoded_data) { |
2095 | 0 | original_t1_data = t1->data; |
2096 | 0 | t1->data = cblk->decoded_data; |
2097 | 0 | } |
2098 | | |
2099 | 716k | for (segno = 0; segno < cblk->real_num_segs; ++segno) { |
2100 | 431k | opj_tcd_seg_t *seg = &cblk->segs[segno]; |
2101 | | |
2102 | | /* BYPASS mode */ |
2103 | 431k | type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) && |
2104 | 431k | (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ; |
2105 | | |
2106 | 431k | if (type == T1_TYPE_RAW) { |
2107 | 57.3k | opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len, |
2108 | 57.3k | OPJ_COMMON_CBLK_DATA_EXTRA); |
2109 | 374k | } else { |
2110 | 374k | opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len, |
2111 | 374k | OPJ_COMMON_CBLK_DATA_EXTRA); |
2112 | 374k | } |
2113 | 431k | cblkdataindex += seg->len; |
2114 | | |
2115 | 3.10M | for (passno = 0; (passno < seg->real_num_passes) && |
2116 | 3.10M | (bpno_plus_one >= 1); ++passno) { |
2117 | 2.67M | switch (passtype) { |
2118 | 853k | case 0: |
2119 | 853k | if (type == T1_TYPE_RAW) { |
2120 | 37.9k | opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); |
2121 | 815k | } else { |
2122 | 815k | opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty); |
2123 | 815k | } |
2124 | 853k | break; |
2125 | 798k | case 1: |
2126 | 798k | if (type == T1_TYPE_RAW) { |
2127 | 36.3k | opj_t1_dec_refpass_raw(t1, bpno_plus_one); |
2128 | 761k | } else { |
2129 | 761k | opj_t1_dec_refpass_mqc(t1, bpno_plus_one); |
2130 | 761k | } |
2131 | 798k | break; |
2132 | 1.02M | case 2: |
2133 | 1.02M | opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty); |
2134 | 1.02M | break; |
2135 | 2.67M | } |
2136 | | |
2137 | 2.67M | if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { |
2138 | 1.38M | opj_mqc_resetstates(mqc); |
2139 | 1.38M | opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); |
2140 | 1.38M | opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); |
2141 | 1.38M | opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); |
2142 | 1.38M | } |
2143 | 2.67M | if (++passtype == 3) { |
2144 | 1.02M | passtype = 0; |
2145 | 1.02M | bpno_plus_one--; |
2146 | 1.02M | } |
2147 | 2.67M | } |
2148 | | |
2149 | 431k | opq_mqc_finish_dec(mqc); |
2150 | 431k | } |
2151 | | |
2152 | 284k | if (check_pterm) { |
2153 | 149k | if (mqc->bp + 2 < mqc->end) { |
2154 | 31.0k | if (p_manager_mutex) { |
2155 | 31.0k | opj_mutex_lock(p_manager_mutex); |
2156 | 31.0k | } |
2157 | 31.0k | opj_event_msg(p_manager, EVT_WARNING, |
2158 | 31.0k | "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n", |
2159 | 31.0k | (int)(mqc->end - mqc->bp) - 2, |
2160 | 31.0k | (int)(mqc->bp - mqc->start), |
2161 | 31.0k | (int)(mqc->end - mqc->start)); |
2162 | 31.0k | if (p_manager_mutex) { |
2163 | 31.0k | opj_mutex_unlock(p_manager_mutex); |
2164 | 31.0k | } |
2165 | 118k | } else if (mqc->end_of_byte_stream_counter > 2) { |
2166 | 107k | if (p_manager_mutex) { |
2167 | 107k | opj_mutex_lock(p_manager_mutex); |
2168 | 107k | } |
2169 | 107k | opj_event_msg(p_manager, EVT_WARNING, |
2170 | 107k | "PTERM check failure: %d synthesized 0xFF markers read\n", |
2171 | 107k | mqc->end_of_byte_stream_counter); |
2172 | 107k | if (p_manager_mutex) { |
2173 | 107k | opj_mutex_unlock(p_manager_mutex); |
2174 | 107k | } |
2175 | 107k | } |
2176 | 149k | } |
2177 | | |
2178 | | /* Restore original t1->data is needed */ |
2179 | 284k | if (cblk->decoded_data) { |
2180 | 0 | t1->data = original_t1_data; |
2181 | 0 | } |
2182 | | |
2183 | 284k | return OPJ_TRUE; |
2184 | 284k | } |
2185 | | |
2186 | | |
2187 | | typedef struct { |
2188 | | OPJ_UINT32 compno; |
2189 | | OPJ_UINT32 resno; |
2190 | | opj_tcd_cblk_enc_t* cblk; |
2191 | | opj_tcd_tile_t *tile; |
2192 | | opj_tcd_band_t* band; |
2193 | | opj_tcd_tilecomp_t* tilec; |
2194 | | opj_tccp_t* tccp; |
2195 | | const OPJ_FLOAT64 * mct_norms; |
2196 | | OPJ_UINT32 mct_numcomps; |
2197 | | volatile OPJ_BOOL* pret; |
2198 | | opj_mutex_t* mutex; |
2199 | | } opj_t1_cblk_encode_processing_job_t; |
2200 | | |
2201 | | /** Procedure to deal with a asynchronous code-block encoding job. |
2202 | | * |
2203 | | * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure |
2204 | | * @param tls TLS handle. |
2205 | | */ |
2206 | | static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls) |
2207 | 0 | { |
2208 | 0 | opj_t1_cblk_encode_processing_job_t* job = |
2209 | 0 | (opj_t1_cblk_encode_processing_job_t*)user_data; |
2210 | 0 | opj_tcd_cblk_enc_t* cblk = job->cblk; |
2211 | 0 | const opj_tcd_band_t* band = job->band; |
2212 | 0 | const opj_tcd_tilecomp_t* tilec = job->tilec; |
2213 | 0 | const opj_tccp_t* tccp = job->tccp; |
2214 | 0 | const OPJ_UINT32 resno = job->resno; |
2215 | 0 | opj_t1_t* t1; |
2216 | 0 | const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); |
2217 | |
|
2218 | 0 | OPJ_INT32* OPJ_RESTRICT tiledp; |
2219 | 0 | OPJ_UINT32 cblk_w; |
2220 | 0 | OPJ_UINT32 cblk_h; |
2221 | 0 | OPJ_UINT32 i, j; |
2222 | |
|
2223 | 0 | OPJ_INT32 x = cblk->x0 - band->x0; |
2224 | 0 | OPJ_INT32 y = cblk->y0 - band->y0; |
2225 | |
|
2226 | 0 | if (!*(job->pret)) { |
2227 | 0 | opj_free(job); |
2228 | 0 | return; |
2229 | 0 | } |
2230 | | |
2231 | 0 | t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); |
2232 | 0 | if (t1 == NULL) { |
2233 | 0 | t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */ |
2234 | 0 | opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); |
2235 | 0 | } |
2236 | |
|
2237 | 0 | if (band->bandno & 1) { |
2238 | 0 | opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; |
2239 | 0 | x += pres->x1 - pres->x0; |
2240 | 0 | } |
2241 | 0 | if (band->bandno & 2) { |
2242 | 0 | opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; |
2243 | 0 | y += pres->y1 - pres->y0; |
2244 | 0 | } |
2245 | |
|
2246 | 0 | if (!opj_t1_allocate_buffers( |
2247 | 0 | t1, |
2248 | 0 | (OPJ_UINT32)(cblk->x1 - cblk->x0), |
2249 | 0 | (OPJ_UINT32)(cblk->y1 - cblk->y0))) { |
2250 | 0 | *(job->pret) = OPJ_FALSE; |
2251 | 0 | opj_free(job); |
2252 | 0 | return; |
2253 | 0 | } |
2254 | | |
2255 | 0 | cblk_w = t1->w; |
2256 | 0 | cblk_h = t1->h; |
2257 | |
|
2258 | 0 | tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; |
2259 | |
|
2260 | 0 | if (tccp->qmfbid == 1) { |
2261 | | /* Do multiplication on unsigned type, even if the |
2262 | | * underlying type is signed, to avoid potential |
2263 | | * int overflow on large value (the output will be |
2264 | | * incorrect in such situation, but whatever...) |
2265 | | * This assumes complement-to-2 signed integer |
2266 | | * representation |
2267 | | * Fixes https://github.com/uclouvain/openjpeg/issues/1053 |
2268 | | */ |
2269 | 0 | OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; |
2270 | 0 | OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data; |
2271 | | /* Change from "natural" order to "zigzag" order of T1 passes */ |
2272 | 0 | for (j = 0; j < (cblk_h & ~3U); j += 4) { |
2273 | | #if defined(__AVX512F__) |
2274 | | const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13); |
2275 | | const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0); |
2276 | | OPJ_UINT32* ptr = tiledp_u; |
2277 | | for (i = 0; i < cblk_w / 16; ++i) { |
2278 | | // INPUT OUTPUT |
2279 | | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 |
2280 | | // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 |
2281 | | // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B |
2282 | | // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F |
2283 | | __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + |
2284 | | (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS); |
2285 | | __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + |
2286 | | (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS); |
2287 | | __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + |
2288 | | (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS); |
2289 | | __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + |
2290 | | (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS); |
2291 | | |
2292 | | __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2); |
2293 | | __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4); |
2294 | | __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2); |
2295 | | __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4); |
2296 | | |
2297 | | in1 = _mm512_unpacklo_epi64(tmp1, tmp2); |
2298 | | in2 = _mm512_unpacklo_epi64(tmp3, tmp4); |
2299 | | in3 = _mm512_unpackhi_epi64(tmp1, tmp2); |
2300 | | in4 = _mm512_unpackhi_epi64(tmp3, tmp4); |
2301 | | |
2302 | | _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1)); |
2303 | | _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3)); |
2304 | | _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2)); |
2305 | | _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4)); |
2306 | | |
2307 | | tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3); |
2308 | | tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4); |
2309 | | |
2310 | | _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1)); |
2311 | | _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2)); |
2312 | | _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1, |
2313 | | 0x1)); |
2314 | | _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2, |
2315 | | 0x1)); |
2316 | | _mm256_storeu_si256((__m256i*)(t1data + 48), |
2317 | | _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3))); |
2318 | | _mm256_storeu_si256((__m256i*)(t1data + 56), |
2319 | | _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4))); |
2320 | | t1data += 64; |
2321 | | ptr += 16; |
2322 | | } |
2323 | | for (i = 0; i < cblk_w % 16; ++i) { |
2324 | | t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS; |
2325 | | t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS; |
2326 | | t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS; |
2327 | | t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS; |
2328 | | t1data += 4; |
2329 | | ptr += 1; |
2330 | | } |
2331 | | #elif defined(__AVX2__) |
2332 | | OPJ_UINT32* ptr = tiledp_u; |
2333 | | for (i = 0; i < cblk_w / 8; ++i) { |
2334 | | // INPUT OUTPUT |
2335 | | // 00 01 02 03 04 05 06 07 00 10 20 30 01 11 21 31 |
2336 | | // 10 11 12 13 14 15 16 17 02 12 22 32 03 13 23 33 |
2337 | | // 20 21 22 23 24 25 26 27 04 14 24 34 05 15 25 35 |
2338 | | // 30 31 32 33 34 35 36 37 06 16 26 36 07 17 27 37 |
2339 | | __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + |
2340 | | (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS); |
2341 | | __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + |
2342 | | (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS); |
2343 | | __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + |
2344 | | (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS); |
2345 | | __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + |
2346 | | (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS); |
2347 | | |
2348 | | __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2); |
2349 | | __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4); |
2350 | | __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2); |
2351 | | __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4); |
2352 | | |
2353 | | in1 = _mm256_unpacklo_epi64(tmp1, tmp2); |
2354 | | in2 = _mm256_unpacklo_epi64(tmp3, tmp4); |
2355 | | in3 = _mm256_unpackhi_epi64(tmp1, tmp2); |
2356 | | in4 = _mm256_unpackhi_epi64(tmp3, tmp4); |
2357 | | |
2358 | | _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1)); |
2359 | | _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3)); |
2360 | | _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2)); |
2361 | | _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4)); |
2362 | | _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3, |
2363 | | 0x31)); |
2364 | | _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4, |
2365 | | 0x31)); |
2366 | | t1data += 32; |
2367 | | ptr += 8; |
2368 | | } |
2369 | | for (i = 0; i < cblk_w % 8; ++i) { |
2370 | | t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS; |
2371 | | t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS; |
2372 | | t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS; |
2373 | | t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS; |
2374 | | t1data += 4; |
2375 | | ptr += 1; |
2376 | | } |
2377 | | #else |
2378 | 0 | for (i = 0; i < cblk_w; ++i) { |
2379 | 0 | t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS; |
2380 | 0 | t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS; |
2381 | 0 | t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS; |
2382 | 0 | t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS; |
2383 | 0 | t1data += 4; |
2384 | 0 | } |
2385 | 0 | #endif |
2386 | 0 | } |
2387 | 0 | if (j < cblk_h) { |
2388 | 0 | for (i = 0; i < cblk_w; ++i) { |
2389 | 0 | OPJ_UINT32 k; |
2390 | 0 | for (k = j; k < cblk_h; k++) { |
2391 | 0 | t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS; |
2392 | 0 | t1data ++; |
2393 | 0 | } |
2394 | 0 | } |
2395 | 0 | } |
2396 | 0 | } else { /* if (tccp->qmfbid == 0) */ |
2397 | 0 | OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp; |
2398 | 0 | OPJ_INT32* OPJ_RESTRICT t1data = t1->data; |
2399 | | /* Change from "natural" order to "zigzag" order of T1 passes */ |
2400 | 0 | for (j = 0; j < (cblk_h & ~3U); j += 4) { |
2401 | 0 | for (i = 0; i < cblk_w; ++i) { |
2402 | 0 | t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] / |
2403 | 0 | band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); |
2404 | 0 | t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] / |
2405 | 0 | band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); |
2406 | 0 | t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] / |
2407 | 0 | band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); |
2408 | 0 | t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] / |
2409 | 0 | band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); |
2410 | 0 | t1data += 4; |
2411 | 0 | } |
2412 | 0 | } |
2413 | 0 | if (j < cblk_h) { |
2414 | 0 | for (i = 0; i < cblk_w; ++i) { |
2415 | 0 | OPJ_UINT32 k; |
2416 | 0 | for (k = j; k < cblk_h; k++) { |
2417 | 0 | t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize) |
2418 | 0 | * (1 << T1_NMSEDEC_FRACBITS)); |
2419 | 0 | t1data ++; |
2420 | 0 | } |
2421 | 0 | } |
2422 | 0 | } |
2423 | 0 | } |
2424 | |
|
2425 | 0 | { |
2426 | 0 | OPJ_FLOAT64 cumwmsedec = |
2427 | 0 | opj_t1_encode_cblk( |
2428 | 0 | t1, |
2429 | 0 | cblk, |
2430 | 0 | band->bandno, |
2431 | 0 | job->compno, |
2432 | 0 | tilec->numresolutions - 1 - resno, |
2433 | 0 | tccp->qmfbid, |
2434 | 0 | band->stepsize, |
2435 | 0 | tccp->cblksty, |
2436 | 0 | job->tile->numcomps, |
2437 | 0 | job->mct_norms, |
2438 | 0 | job->mct_numcomps); |
2439 | 0 | if (job->mutex) { |
2440 | 0 | opj_mutex_lock(job->mutex); |
2441 | 0 | } |
2442 | 0 | job->tile->distotile += cumwmsedec; |
2443 | 0 | if (job->mutex) { |
2444 | 0 | opj_mutex_unlock(job->mutex); |
2445 | 0 | } |
2446 | 0 | } |
2447 | |
|
2448 | 0 | opj_free(job); |
2449 | 0 | } |
2450 | | |
2451 | | |
2452 | | OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, |
2453 | | opj_tcd_tile_t *tile, |
2454 | | opj_tcp_t *tcp, |
2455 | | const OPJ_FLOAT64 * mct_norms, |
2456 | | OPJ_UINT32 mct_numcomps |
2457 | | ) |
2458 | 0 | { |
2459 | 0 | volatile OPJ_BOOL ret = OPJ_TRUE; |
2460 | 0 | opj_thread_pool_t* tp = tcd->thread_pool; |
2461 | 0 | OPJ_UINT32 compno, resno, bandno, precno, cblkno; |
2462 | 0 | opj_mutex_t* mutex = opj_mutex_create(); |
2463 | |
|
2464 | 0 | tile->distotile = 0; |
2465 | |
|
2466 | 0 | for (compno = 0; compno < tile->numcomps; ++compno) { |
2467 | 0 | opj_tcd_tilecomp_t* tilec = &tile->comps[compno]; |
2468 | 0 | opj_tccp_t* tccp = &tcp->tccps[compno]; |
2469 | |
|
2470 | 0 | for (resno = 0; resno < tilec->numresolutions; ++resno) { |
2471 | 0 | opj_tcd_resolution_t *res = &tilec->resolutions[resno]; |
2472 | |
|
2473 | 0 | for (bandno = 0; bandno < res->numbands; ++bandno) { |
2474 | 0 | opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; |
2475 | | |
2476 | | /* Skip empty bands */ |
2477 | 0 | if (opj_tcd_is_band_empty(band)) { |
2478 | 0 | continue; |
2479 | 0 | } |
2480 | 0 | for (precno = 0; precno < res->pw * res->ph; ++precno) { |
2481 | 0 | opj_tcd_precinct_t *prc = &band->precincts[precno]; |
2482 | |
|
2483 | 0 | for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { |
2484 | 0 | opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno]; |
2485 | |
|
2486 | 0 | opj_t1_cblk_encode_processing_job_t* job = |
2487 | 0 | (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1, |
2488 | 0 | sizeof(opj_t1_cblk_encode_processing_job_t)); |
2489 | 0 | if (!job) { |
2490 | 0 | ret = OPJ_FALSE; |
2491 | 0 | goto end; |
2492 | 0 | } |
2493 | 0 | job->compno = compno; |
2494 | 0 | job->tile = tile; |
2495 | 0 | job->resno = resno; |
2496 | 0 | job->cblk = cblk; |
2497 | 0 | job->band = band; |
2498 | 0 | job->tilec = tilec; |
2499 | 0 | job->tccp = tccp; |
2500 | 0 | job->mct_norms = mct_norms; |
2501 | 0 | job->mct_numcomps = mct_numcomps; |
2502 | 0 | job->pret = &ret; |
2503 | 0 | job->mutex = mutex; |
2504 | 0 | opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job); |
2505 | |
|
2506 | 0 | } /* cblkno */ |
2507 | 0 | } /* precno */ |
2508 | 0 | } /* bandno */ |
2509 | 0 | } /* resno */ |
2510 | 0 | } /* compno */ |
2511 | | |
2512 | 0 | end: |
2513 | 0 | opj_thread_pool_wait_completion(tcd->thread_pool, 0); |
2514 | 0 | if (mutex) { |
2515 | 0 | opj_mutex_destroy(mutex); |
2516 | 0 | } |
2517 | |
|
2518 | 0 | return ret; |
2519 | 0 | } |
2520 | | |
2521 | | /* Returns whether the pass (bpno, passtype) is terminated */ |
2522 | | static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk, |
2523 | | OPJ_UINT32 cblksty, |
2524 | | OPJ_INT32 bpno, |
2525 | | OPJ_UINT32 passtype) |
2526 | 0 | { |
2527 | | /* Is it the last cleanup pass ? */ |
2528 | 0 | if (passtype == 2 && bpno == 0) { |
2529 | 0 | return OPJ_TRUE; |
2530 | 0 | } |
2531 | | |
2532 | 0 | if (cblksty & J2K_CCP_CBLKSTY_TERMALL) { |
2533 | 0 | return OPJ_TRUE; |
2534 | 0 | } |
2535 | | |
2536 | 0 | if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) { |
2537 | | /* For bypass arithmetic bypass, terminate the 4th cleanup pass */ |
2538 | 0 | if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) { |
2539 | 0 | return OPJ_TRUE; |
2540 | 0 | } |
2541 | | /* and beyond terminate all the magnitude refinement passes (in raw) */ |
2542 | | /* and cleanup passes (in MQC) */ |
2543 | 0 | if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) { |
2544 | 0 | return OPJ_TRUE; |
2545 | 0 | } |
2546 | 0 | } |
2547 | | |
2548 | 0 | return OPJ_FALSE; |
2549 | 0 | } |
2550 | | |
2551 | | |
2552 | | static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1, |
2553 | | opj_tcd_cblk_enc_t* cblk, |
2554 | | OPJ_UINT32 orient, |
2555 | | OPJ_UINT32 compno, |
2556 | | OPJ_UINT32 level, |
2557 | | OPJ_UINT32 qmfbid, |
2558 | | OPJ_FLOAT64 stepsize, |
2559 | | OPJ_UINT32 cblksty, |
2560 | | OPJ_UINT32 numcomps, |
2561 | | const OPJ_FLOAT64 * mct_norms, |
2562 | | OPJ_UINT32 mct_numcomps) |
2563 | 0 | { |
2564 | 0 | OPJ_FLOAT64 cumwmsedec = 0.0; |
2565 | |
|
2566 | 0 | opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ |
2567 | |
|
2568 | 0 | OPJ_UINT32 passno; |
2569 | 0 | OPJ_INT32 bpno; |
2570 | 0 | OPJ_UINT32 passtype; |
2571 | 0 | OPJ_INT32 nmsedec = 0; |
2572 | 0 | OPJ_INT32 max; |
2573 | 0 | OPJ_UINT32 i, j; |
2574 | 0 | OPJ_BYTE type = T1_TYPE_MQ; |
2575 | 0 | OPJ_FLOAT64 tempwmsedec; |
2576 | 0 | OPJ_INT32* datap; |
2577 | |
|
2578 | | #ifdef EXTRA_DEBUG |
2579 | | printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n", |
2580 | | cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level); |
2581 | | #endif |
2582 | |
|
2583 | 0 | mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); |
2584 | |
|
2585 | 0 | max = 0; |
2586 | 0 | datap = t1->data; |
2587 | 0 | for (j = 0; j < t1->h; ++j) { |
2588 | 0 | const OPJ_UINT32 w = t1->w; |
2589 | 0 | for (i = 0; i < w; ++i, ++datap) { |
2590 | 0 | OPJ_INT32 tmp = *datap; |
2591 | 0 | if (tmp < 0) { |
2592 | 0 | OPJ_UINT32 tmp_unsigned; |
2593 | 0 | if (tmp == INT_MIN) { |
2594 | | /* To avoid undefined behaviour when negating INT_MIN */ |
2595 | | /* but if we go here, it means we have supplied an input */ |
2596 | | /* with more bit depth than we we can really support. */ |
2597 | | /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */ |
2598 | 0 | tmp = INT_MIN + 1; |
2599 | 0 | } |
2600 | 0 | max = opj_int_max(max, -tmp); |
2601 | 0 | tmp_unsigned = opj_to_smr(tmp); |
2602 | 0 | memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32)); |
2603 | 0 | } else { |
2604 | 0 | max = opj_int_max(max, tmp); |
2605 | 0 | } |
2606 | 0 | } |
2607 | 0 | } |
2608 | |
|
2609 | 0 | cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) - |
2610 | 0 | T1_NMSEDEC_FRACBITS) : 0; |
2611 | 0 | if (cblk->numbps == 0) { |
2612 | 0 | cblk->totalpasses = 0; |
2613 | 0 | return cumwmsedec; |
2614 | 0 | } |
2615 | | |
2616 | 0 | bpno = (OPJ_INT32)(cblk->numbps - 1); |
2617 | 0 | passtype = 2; |
2618 | |
|
2619 | 0 | opj_mqc_resetstates(mqc); |
2620 | 0 | opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); |
2621 | 0 | opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); |
2622 | 0 | opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); |
2623 | 0 | opj_mqc_init_enc(mqc, cblk->data); |
2624 | |
|
2625 | 0 | for (passno = 0; bpno >= 0; ++passno) { |
2626 | 0 | opj_tcd_pass_t *pass = &cblk->passes[passno]; |
2627 | 0 | type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) && |
2628 | 0 | (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ; |
2629 | | |
2630 | | /* If the previous pass was terminating, we need to reset the encoder */ |
2631 | 0 | if (passno > 0 && cblk->passes[passno - 1].term) { |
2632 | 0 | if (type == T1_TYPE_RAW) { |
2633 | 0 | opj_mqc_bypass_init_enc(mqc); |
2634 | 0 | } else { |
2635 | 0 | opj_mqc_restart_init_enc(mqc); |
2636 | 0 | } |
2637 | 0 | } |
2638 | |
|
2639 | 0 | switch (passtype) { |
2640 | 0 | case 0: |
2641 | 0 | opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty); |
2642 | 0 | break; |
2643 | 0 | case 1: |
2644 | 0 | opj_t1_enc_refpass(t1, bpno, &nmsedec, type); |
2645 | 0 | break; |
2646 | 0 | case 2: |
2647 | 0 | opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty); |
2648 | | /* code switch SEGMARK (i.e. SEGSYM) */ |
2649 | 0 | if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) { |
2650 | 0 | opj_mqc_segmark_enc(mqc); |
2651 | 0 | } |
2652 | 0 | break; |
2653 | 0 | } |
2654 | | |
2655 | 0 | tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid, |
2656 | 0 | stepsize, numcomps, mct_norms, mct_numcomps) ; |
2657 | 0 | cumwmsedec += tempwmsedec; |
2658 | 0 | pass->distortiondec = cumwmsedec; |
2659 | |
|
2660 | 0 | if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) { |
2661 | | /* If it is a terminated pass, terminate it */ |
2662 | 0 | if (type == T1_TYPE_RAW) { |
2663 | 0 | opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM); |
2664 | 0 | } else { |
2665 | 0 | if (cblksty & J2K_CCP_CBLKSTY_PTERM) { |
2666 | 0 | opj_mqc_erterm_enc(mqc); |
2667 | 0 | } else { |
2668 | 0 | opj_mqc_flush(mqc); |
2669 | 0 | } |
2670 | 0 | } |
2671 | 0 | pass->term = 1; |
2672 | 0 | pass->rate = opj_mqc_numbytes(mqc); |
2673 | 0 | } else { |
2674 | | /* Non terminated pass */ |
2675 | 0 | OPJ_UINT32 rate_extra_bytes; |
2676 | 0 | if (type == T1_TYPE_RAW) { |
2677 | 0 | rate_extra_bytes = opj_mqc_bypass_get_extra_bytes( |
2678 | 0 | mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM)); |
2679 | 0 | } else { |
2680 | 0 | rate_extra_bytes = 3; |
2681 | 0 | } |
2682 | 0 | pass->term = 0; |
2683 | 0 | pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes; |
2684 | 0 | } |
2685 | |
|
2686 | 0 | if (++passtype == 3) { |
2687 | 0 | passtype = 0; |
2688 | 0 | bpno--; |
2689 | 0 | } |
2690 | | |
2691 | | /* Code-switch "RESET" */ |
2692 | 0 | if (cblksty & J2K_CCP_CBLKSTY_RESET) { |
2693 | 0 | opj_mqc_reset_enc(mqc); |
2694 | 0 | } |
2695 | 0 | } |
2696 | | |
2697 | 0 | cblk->totalpasses = passno; |
2698 | |
|
2699 | 0 | if (cblk->totalpasses) { |
2700 | | /* Make sure that pass rates are increasing */ |
2701 | 0 | OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc); |
2702 | 0 | for (passno = cblk->totalpasses; passno > 0;) { |
2703 | 0 | opj_tcd_pass_t *pass = &cblk->passes[--passno]; |
2704 | 0 | if (pass->rate > last_pass_rate) { |
2705 | 0 | pass->rate = last_pass_rate; |
2706 | 0 | } else { |
2707 | 0 | last_pass_rate = pass->rate; |
2708 | 0 | } |
2709 | 0 | } |
2710 | 0 | } |
2711 | |
|
2712 | 0 | for (passno = 0; passno < cblk->totalpasses; passno++) { |
2713 | 0 | opj_tcd_pass_t *pass = &cblk->passes[passno]; |
2714 | | |
2715 | | /* Prevent generation of FF as last data byte of a pass*/ |
2716 | | /* For terminating passes, the flushing procedure ensured this already */ |
2717 | 0 | assert(pass->rate > 0); |
2718 | 0 | if (cblk->data[pass->rate - 1] == 0xFF) { |
2719 | 0 | pass->rate--; |
2720 | 0 | } |
2721 | 0 | pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate); |
2722 | 0 | } |
2723 | |
|
2724 | | #ifdef EXTRA_DEBUG |
2725 | | printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0); |
2726 | | |
2727 | | /* Check that there not 0xff >=0x90 sequences */ |
2728 | | if (cblk->totalpasses) { |
2729 | | OPJ_UINT32 i; |
2730 | | OPJ_UINT32 len = opj_mqc_numbytes(mqc); |
2731 | | for (i = 1; i < len; ++i) { |
2732 | | if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) { |
2733 | | printf("0xff %02x at offset %d\n", cblk->data[i], i - 1); |
2734 | | abort(); |
2735 | | } |
2736 | | } |
2737 | | } |
2738 | | #endif |
2739 | |
|
2740 | 0 | return cumwmsedec; |
2741 | 0 | } |