Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/openjpeg/src/lib/openjp2/t1.c
Line
Count
Source
1
/*
2
 * The copyright in this software is being made available under the 2-clauses
3
 * BSD License, included below. This software may be subject to other third
4
 * party and contributor rights, including patent rights, and no such rights
5
 * are granted under this license.
6
 *
7
 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8
 * Copyright (c) 2002-2014, Professor Benoit Macq
9
 * Copyright (c) 2001-2003, David Janssens
10
 * Copyright (c) 2002-2003, Yannick Verschueren
11
 * Copyright (c) 2003-2007, Francois-Olivier Devaux
12
 * Copyright (c) 2003-2014, Antonin Descampe
13
 * Copyright (c) 2005, Herve Drolon, FreeImage Team
14
 * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15
 * Copyright (c) 2012, Carl Hetherington
16
 * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17
 * All rights reserved.
18
 *
19
 * Redistribution and use in source and binary forms, with or without
20
 * modification, are permitted provided that the following conditions
21
 * are met:
22
 * 1. Redistributions of source code must retain the above copyright
23
 *    notice, this list of conditions and the following disclaimer.
24
 * 2. Redistributions in binary form must reproduce the above copyright
25
 *    notice, this list of conditions and the following disclaimer in the
26
 *    documentation and/or other materials provided with the distribution.
27
 *
28
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
 * POSSIBILITY OF SUCH DAMAGE.
39
 */
40
41
#define OPJ_SKIP_POISON
42
#include "opj_includes.h"
43
44
#ifdef __SSE__
45
#include <xmmintrin.h>
46
#endif
47
#ifdef __SSE2__
48
#include <emmintrin.h>
49
#endif
50
#if (defined(__AVX2__) || defined(__AVX512F__))
51
#include <immintrin.h>
52
#endif
53
54
#if defined(__GNUC__)
55
#pragma GCC poison malloc calloc realloc free
56
#endif
57
58
#include "t1_luts.h"
59
60
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
61
/*@{*/
62
63
24.1k
#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
64
65
251k
#define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
66
67
/* Macros to deal with signed integer with just MSB bit set for
68
 * negative values (smr = signed magnitude representation) */
69
250k
#define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
70
21.1k
#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
71
21.1k
#define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
72
73
74
/** @name Local static functions */
75
/*@{*/
76
77
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
78
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
79
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
80
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
81
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
82
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
83
                                       OPJ_UINT32 vsc);
84
85
86
/**
87
Decode significant pass
88
*/
89
90
static INLINE void opj_t1_dec_sigpass_step_raw(
91
    opj_t1_t *t1,
92
    opj_flag_t *flagsp,
93
    OPJ_INT32 *datap,
94
    OPJ_INT32 oneplushalf,
95
    OPJ_UINT32 vsc,
96
    OPJ_UINT32 row);
97
static INLINE void opj_t1_dec_sigpass_step_mqc(
98
    opj_t1_t *t1,
99
    opj_flag_t *flagsp,
100
    OPJ_INT32 *datap,
101
    OPJ_INT32 oneplushalf,
102
    OPJ_UINT32 row,
103
    OPJ_UINT32 flags_stride,
104
    OPJ_UINT32 vsc);
105
106
/**
107
Encode significant pass
108
*/
109
static void opj_t1_enc_sigpass(opj_t1_t *t1,
110
                               OPJ_INT32 bpno,
111
                               OPJ_INT32 *nmsedec,
112
                               OPJ_BYTE type,
113
                               OPJ_UINT32 cblksty);
114
115
/**
116
Decode significant pass
117
*/
118
static void opj_t1_dec_sigpass_raw(
119
    opj_t1_t *t1,
120
    OPJ_INT32 bpno,
121
    OPJ_INT32 cblksty);
122
123
/**
124
Encode refinement pass
125
*/
126
static void opj_t1_enc_refpass(opj_t1_t *t1,
127
                               OPJ_INT32 bpno,
128
                               OPJ_INT32 *nmsedec,
129
                               OPJ_BYTE type);
130
131
/**
132
Decode refinement pass
133
*/
134
static void opj_t1_dec_refpass_raw(
135
    opj_t1_t *t1,
136
    OPJ_INT32 bpno);
137
138
139
/**
140
Decode refinement pass
141
*/
142
143
static INLINE void  opj_t1_dec_refpass_step_raw(
144
    opj_t1_t *t1,
145
    opj_flag_t *flagsp,
146
    OPJ_INT32 *datap,
147
    OPJ_INT32 poshalf,
148
    OPJ_UINT32 row);
149
static INLINE void opj_t1_dec_refpass_step_mqc(
150
    opj_t1_t *t1,
151
    opj_flag_t *flagsp,
152
    OPJ_INT32 *datap,
153
    OPJ_INT32 poshalf,
154
    OPJ_UINT32 row);
155
156
157
/**
158
Decode clean-up pass
159
*/
160
161
static void opj_t1_dec_clnpass_step(
162
    opj_t1_t *t1,
163
    opj_flag_t *flagsp,
164
    OPJ_INT32 *datap,
165
    OPJ_INT32 oneplushalf,
166
    OPJ_UINT32 row,
167
    OPJ_UINT32 vsc);
168
169
/**
170
Encode clean-up pass
171
*/
172
static void opj_t1_enc_clnpass(
173
    opj_t1_t *t1,
174
    OPJ_INT32 bpno,
175
    OPJ_INT32 *nmsedec,
176
    OPJ_UINT32 cblksty);
177
178
static OPJ_FLOAT64 opj_t1_getwmsedec(
179
    OPJ_INT32 nmsedec,
180
    OPJ_UINT32 compno,
181
    OPJ_UINT32 level,
182
    OPJ_UINT32 orient,
183
    OPJ_INT32 bpno,
184
    OPJ_UINT32 qmfbid,
185
    OPJ_FLOAT64 stepsize,
186
    OPJ_UINT32 numcomps,
187
    const OPJ_FLOAT64 * mct_norms,
188
    OPJ_UINT32 mct_numcomps);
189
190
/** Return "cumwmsedec" that should be used to increase tile->distotile */
191
static double opj_t1_encode_cblk(opj_t1_t *t1,
192
                                 opj_tcd_cblk_enc_t* cblk,
193
                                 OPJ_UINT32 orient,
194
                                 OPJ_UINT32 compno,
195
                                 OPJ_UINT32 level,
196
                                 OPJ_UINT32 qmfbid,
197
                                 OPJ_FLOAT64 stepsize,
198
                                 OPJ_UINT32 cblksty,
199
                                 OPJ_UINT32 numcomps,
200
                                 const OPJ_FLOAT64 * mct_norms,
201
                                 OPJ_UINT32 mct_numcomps);
202
203
/**
204
Decode 1 code-block
205
@param t1 T1 handle
206
@param cblk Code-block coding parameters
207
@param orient
208
@param roishift Region of interest shifting value
209
@param cblksty Code-block style
210
@param p_manager the event manager
211
@param p_manager_mutex mutex for the event manager
212
@param check_pterm whether PTERM correct termination should be checked
213
*/
214
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
215
                                   opj_tcd_cblk_dec_t* cblk,
216
                                   OPJ_UINT32 orient,
217
                                   OPJ_UINT32 roishift,
218
                                   OPJ_UINT32 cblksty,
219
                                   opj_event_mgr_t *p_manager,
220
                                   opj_mutex_t* p_manager_mutex,
221
                                   OPJ_BOOL check_pterm);
222
223
/**
224
Decode 1 HT code-block
225
@param t1 T1 handle
226
@param cblk Code-block coding parameters
227
@param orient
228
@param roishift Region of interest shifting value
229
@param cblksty Code-block style
230
@param p_manager the event manager
231
@param p_manager_mutex mutex for the event manager
232
@param check_pterm whether PTERM correct termination should be checked
233
*/
234
OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
235
                               opj_tcd_cblk_dec_t* cblk,
236
                               OPJ_UINT32 orient,
237
                               OPJ_UINT32 roishift,
238
                               OPJ_UINT32 cblksty,
239
                               opj_event_mgr_t *p_manager,
240
                               opj_mutex_t* p_manager_mutex,
241
                               OPJ_BOOL check_pterm);
242
243
244
static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
245
                                        OPJ_UINT32 w,
246
                                        OPJ_UINT32 h);
247
248
/*@}*/
249
250
/*@}*/
251
252
/* ----------------------------------------------------------------------- */
253
254
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
255
20.6k
{
256
20.6k
    return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
257
20.6k
}
258
259
static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
260
        OPJ_UINT32 pfX,
261
        OPJ_UINT32 nfX,
262
        OPJ_UINT32 ci)
263
21.1k
{
264
    /*
265
      0 pfX T1_CHI_THIS           T1_LUT_SGN_W
266
      1 tfX T1_SIGMA_1            T1_LUT_SIG_N
267
      2 nfX T1_CHI_THIS           T1_LUT_SGN_E
268
      3 tfX T1_SIGMA_3            T1_LUT_SIG_W
269
      4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
270
      5 tfX T1_SIGMA_5            T1_LUT_SIG_E
271
      6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
272
      7 tfX T1_SIGMA_7            T1_LUT_SIG_S
273
    */
274
275
21.1k
    OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
276
21.1k
                                         T1_SIGMA_7);
277
278
21.1k
    lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
279
21.1k
    lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
280
21.1k
    if (ci == 0U) {
281
7.00k
        lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
282
14.1k
    } else {
283
14.1k
        lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
284
14.1k
    }
285
21.1k
    lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
286
21.1k
    return lu;
287
21.1k
}
288
289
static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
290
21.1k
{
291
21.1k
    return lut_ctxno_sc[lu];
292
21.1k
}
293
294
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
295
208k
{
296
208k
    OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
297
208k
    OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
298
208k
    return tmp2;
299
208k
}
300
301
static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
302
21.1k
{
303
21.1k
    return lut_spb[lu];
304
21.1k
}
305
306
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
307
21.1k
{
308
21.1k
    if (bitpos > 0) {
309
21.1k
        return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
310
21.1k
    }
311
312
0
    return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
313
21.1k
}
314
315
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
316
208k
{
317
208k
    if (bitpos > 0) {
318
186k
        return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
319
186k
    }
320
321
21.1k
    return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
322
208k
}
323
324
21.1k
#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
325
21.1k
{ \
326
21.1k
    /* east */ \
327
21.1k
    flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
328
21.1k
 \
329
21.1k
    /* mark target as significant */ \
330
21.1k
    flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
331
21.1k
 \
332
21.1k
    /* west */ \
333
21.1k
    flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
334
21.1k
 \
335
21.1k
    /* north-west, north, north-east */ \
336
21.1k
    if (ci == 0U && !(vsc)) { \
337
7.00k
        opj_flag_t* north = flagsp - (stride); \
338
7.00k
        *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
339
7.00k
        north[-1] |= T1_SIGMA_17; \
340
7.00k
        north[1] |= T1_SIGMA_15; \
341
7.00k
    } \
342
21.1k
 \
343
21.1k
    /* south-west, south, south-east */ \
344
21.1k
    if (ci == 3U) { \
345
3.93k
        opj_flag_t* south = flagsp + (stride); \
346
3.93k
        *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
347
3.93k
        south[-1] |= T1_SIGMA_2; \
348
3.93k
        south[1] |= T1_SIGMA_0; \
349
3.93k
    } \
350
21.1k
}
351
352
353
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
354
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
355
                                       OPJ_UINT32 vsc)
356
21.1k
{
357
21.1k
    opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
358
21.1k
}
359
360
/**
361
Encode significant pass
362
*/
363
208k
#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
364
208k
{ \
365
208k
    OPJ_UINT32 v; \
366
208k
    const OPJ_UINT32 ci = (ciIn); \
367
208k
    const OPJ_UINT32 vsc = (vscIn); \
368
208k
    const OPJ_INT32* l_datap = (datapIn); \
369
208k
    opj_flag_t* flagsp = (flagspIn); \
370
208k
    OPJ_UINT32 const flags = *flagsp; \
371
208k
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
372
208k
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
373
0
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
374
0
        v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
375
0
/* #ifdef DEBUG_ENC_SIG */ \
376
0
/*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
377
0
/* #endif */ \
378
0
        opj_t1_setcurctx(curctx, ctxt1); \
379
0
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
380
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
381
0
        } else { \
382
0
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
383
0
        } \
384
0
        if (v) { \
385
0
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
386
0
                                *flagsp, \
387
0
                                flagsp[-1], flagsp[1], \
388
0
                                ci); \
389
0
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
390
0
            v = opj_smr_sign(*l_datap); \
391
0
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
392
0
                                              (OPJ_UINT32)bpno); \
393
0
/* #ifdef DEBUG_ENC_SIG */ \
394
0
/*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
395
0
/* #endif */ \
396
0
            opj_t1_setcurctx(curctx, ctxt2); \
397
0
            if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
398
0
                opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
399
0
            } else { \
400
0
                OPJ_UINT32 spb = opj_t1_getspb(lu); \
401
0
/* #ifdef DEBUG_ENC_SIG */ \
402
0
/*                fprintf(stderr, "   spb=%d\n", spb); */ \
403
0
/* #endif */ \
404
0
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
405
0
            } \
406
0
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
407
0
        } \
408
0
        *flagsp |= T1_PI_THIS << (ci * 3U); \
409
0
    } \
410
208k
}
411
412
static INLINE void opj_t1_dec_sigpass_step_raw(
413
    opj_t1_t *t1,
414
    opj_flag_t *flagsp,
415
    OPJ_INT32 *datap,
416
    OPJ_INT32 oneplushalf,
417
    OPJ_UINT32 vsc,
418
    OPJ_UINT32 ci)
419
0
{
420
0
    OPJ_UINT32 v;
421
0
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
422
423
0
    OPJ_UINT32 const flags = *flagsp;
424
425
0
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
426
0
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
427
0
        if (opj_mqc_raw_decode(mqc)) {
428
0
            v = opj_mqc_raw_decode(mqc);
429
0
            *datap = v ? -oneplushalf : oneplushalf;
430
0
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
431
0
        }
432
0
        *flagsp |= T1_PI_THIS << (ci * 3U);
433
0
    }
434
0
}
435
436
#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
437
                                          data_stride, ci, mqc, curctx, \
438
0
                                          v, a, c, ct, oneplushalf, vsc) \
439
0
{ \
440
0
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
441
0
        (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
442
0
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
443
0
        opj_t1_setcurctx(curctx, ctxt1); \
444
0
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
445
0
        if (v) { \
446
0
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
447
0
                                flags, \
448
0
                                flagsp[-1], flagsp[1], \
449
0
                                ci); \
450
0
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
451
0
            OPJ_UINT32 spb = opj_t1_getspb(lu); \
452
0
            opj_t1_setcurctx(curctx, ctxt2); \
453
0
            opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
454
0
            v = v ^ spb; \
455
0
            data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
456
0
            opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
457
0
        } \
458
0
        flags |= T1_PI_THIS << (ci * 3U); \
459
0
    } \
460
0
}
461
462
static INLINE void opj_t1_dec_sigpass_step_mqc(
463
    opj_t1_t *t1,
464
    opj_flag_t *flagsp,
465
    OPJ_INT32 *datap,
466
    OPJ_INT32 oneplushalf,
467
    OPJ_UINT32 ci,
468
    OPJ_UINT32 flags_stride,
469
    OPJ_UINT32 vsc)
470
0
{
471
0
    OPJ_UINT32 v;
472
473
0
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
474
0
    opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
475
0
                                      0, ci, mqc, mqc->curctx,
476
0
                                      v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
477
0
}
478
479
static void opj_t1_enc_sigpass(opj_t1_t *t1,
480
                               OPJ_INT32 bpno,
481
                               OPJ_INT32 *nmsedec,
482
                               OPJ_BYTE type,
483
                               OPJ_UINT32 cblksty
484
                              )
485
7.78k
{
486
7.78k
    OPJ_UINT32 i, k;
487
7.78k
    OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
488
7.78k
    opj_flag_t* f = &T1_FLAGS(0, 0);
489
7.78k
    OPJ_UINT32 const extra = 2;
490
7.78k
    opj_mqc_t* mqc = &(t1->mqc);
491
7.78k
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
492
7.78k
    const OPJ_INT32* datap = t1->data;
493
494
7.78k
    *nmsedec = 0;
495
#ifdef DEBUG_ENC_SIG
496
    fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
497
#endif
498
14.8k
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
499
7.06k
        const OPJ_UINT32 w = t1->w;
500
#ifdef DEBUG_ENC_SIG
501
        fprintf(stderr, " k=%d\n", k);
502
#endif
503
45.8k
        for (i = 0; i < w; ++i, ++f, datap += 4) {
504
#ifdef DEBUG_ENC_SIG
505
            fprintf(stderr, " i=%d\n", i);
506
#endif
507
38.7k
            if (*f == 0U) {
508
                /* Nothing to do for any of the 4 data points */
509
0
                continue;
510
0
            }
511
38.7k
            opj_t1_enc_sigpass_step_macro(
512
38.7k
                mqc, curctx, a, c, ct,
513
38.7k
                f,
514
38.7k
                &datap[0],
515
38.7k
                bpno,
516
38.7k
                one,
517
38.7k
                nmsedec,
518
38.7k
                type,
519
38.7k
                0, cblksty & J2K_CCP_CBLKSTY_VSC);
520
38.7k
            opj_t1_enc_sigpass_step_macro(
521
38.7k
                mqc, curctx, a, c, ct,
522
38.7k
                f,
523
38.7k
                &datap[1],
524
38.7k
                bpno,
525
38.7k
                one,
526
38.7k
                nmsedec,
527
38.7k
                type,
528
38.7k
                1, 0);
529
38.7k
            opj_t1_enc_sigpass_step_macro(
530
38.7k
                mqc, curctx, a, c, ct,
531
38.7k
                f,
532
38.7k
                &datap[2],
533
38.7k
                bpno,
534
38.7k
                one,
535
38.7k
                nmsedec,
536
38.7k
                type,
537
38.7k
                2, 0);
538
38.7k
            opj_t1_enc_sigpass_step_macro(
539
38.7k
                mqc, curctx, a, c, ct,
540
38.7k
                f,
541
38.7k
                &datap[3],
542
38.7k
                bpno,
543
38.7k
                one,
544
38.7k
                nmsedec,
545
38.7k
                type,
546
38.7k
                3, 0);
547
38.7k
        }
548
7.06k
    }
549
550
7.78k
    if (k < t1->h) {
551
6.22k
        OPJ_UINT32 j;
552
#ifdef DEBUG_ENC_SIG
553
        fprintf(stderr, " k=%d\n", k);
554
#endif
555
36.3k
        for (i = 0; i < t1->w; ++i, ++f) {
556
#ifdef DEBUG_ENC_SIG
557
            fprintf(stderr, " i=%d\n", i);
558
#endif
559
30.1k
            if (*f == 0U) {
560
                /* Nothing to do for any of the 4 data points */
561
0
                datap += (t1->h - k);
562
0
                continue;
563
0
            }
564
83.2k
            for (j = k; j < t1->h; ++j, ++datap) {
565
53.1k
                opj_t1_enc_sigpass_step_macro(
566
53.1k
                    mqc, curctx, a, c, ct,
567
53.1k
                    f,
568
53.1k
                    &datap[0],
569
53.1k
                    bpno,
570
53.1k
                    one,
571
53.1k
                    nmsedec,
572
53.1k
                    type,
573
53.1k
                    j - k,
574
53.1k
                    (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
575
53.1k
            }
576
30.1k
        }
577
6.22k
    }
578
579
7.78k
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
580
7.78k
}
581
582
static void opj_t1_dec_sigpass_raw(
583
    opj_t1_t *t1,
584
    OPJ_INT32 bpno,
585
    OPJ_INT32 cblksty)
586
0
{
587
0
    OPJ_INT32 one, half, oneplushalf;
588
0
    OPJ_UINT32 i, j, k;
589
0
    OPJ_INT32 *data = t1->data;
590
0
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
591
0
    const OPJ_UINT32 l_w = t1->w;
592
0
    one = 1 << bpno;
593
0
    half = one >> 1;
594
0
    oneplushalf = one | half;
595
596
0
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
597
0
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
598
0
            opj_flag_t flags = *flagsp;
599
0
            if (flags != 0) {
600
0
                opj_t1_dec_sigpass_step_raw(
601
0
                    t1,
602
0
                    flagsp,
603
0
                    data,
604
0
                    oneplushalf,
605
0
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
606
0
                    0U);
607
0
                opj_t1_dec_sigpass_step_raw(
608
0
                    t1,
609
0
                    flagsp,
610
0
                    data + l_w,
611
0
                    oneplushalf,
612
0
                    OPJ_FALSE, /* vsc */
613
0
                    1U);
614
0
                opj_t1_dec_sigpass_step_raw(
615
0
                    t1,
616
0
                    flagsp,
617
0
                    data + 2 * l_w,
618
0
                    oneplushalf,
619
0
                    OPJ_FALSE, /* vsc */
620
0
                    2U);
621
0
                opj_t1_dec_sigpass_step_raw(
622
0
                    t1,
623
0
                    flagsp,
624
0
                    data + 3 * l_w,
625
0
                    oneplushalf,
626
0
                    OPJ_FALSE, /* vsc */
627
0
                    3U);
628
0
            }
629
0
        }
630
0
    }
631
0
    if (k < t1->h) {
632
0
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
633
0
            for (j = 0; j < t1->h - k; ++j) {
634
0
                opj_t1_dec_sigpass_step_raw(
635
0
                    t1,
636
0
                    flagsp,
637
0
                    data + j * l_w,
638
0
                    oneplushalf,
639
0
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
640
0
                    j);
641
0
            }
642
0
        }
643
0
    }
644
0
}
645
646
0
#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
647
0
{ \
648
0
        OPJ_INT32 one, half, oneplushalf; \
649
0
        OPJ_UINT32 i, j, k; \
650
0
        register OPJ_INT32 *data = t1->data; \
651
0
        register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
652
0
        const OPJ_UINT32 l_w = w; \
653
0
        opj_mqc_t* mqc = &(t1->mqc); \
654
0
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
655
0
        register OPJ_UINT32 v; \
656
0
        one = 1 << bpno; \
657
0
        half = one >> 1; \
658
0
        oneplushalf = one | half; \
659
0
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
660
0
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
661
0
                        opj_flag_t flags = *flagsp; \
662
0
                        if( flags != 0 ) { \
663
0
                            opj_t1_dec_sigpass_step_mqc_macro( \
664
0
                                flags, flagsp, flags_stride, data, \
665
0
                                l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
666
0
                            opj_t1_dec_sigpass_step_mqc_macro( \
667
0
                                flags, flagsp, flags_stride, data, \
668
0
                                l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
669
0
                            opj_t1_dec_sigpass_step_mqc_macro( \
670
0
                                flags, flagsp, flags_stride, data, \
671
0
                                l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
672
0
                            opj_t1_dec_sigpass_step_mqc_macro( \
673
0
                                flags, flagsp, flags_stride, data, \
674
0
                                l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
675
0
                            *flagsp = flags; \
676
0
                        } \
677
0
                } \
678
0
        } \
679
0
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
680
0
        if( k < h ) { \
681
0
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
682
0
                for (j = 0; j < h - k; ++j) { \
683
0
                        opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
684
0
                            data + j * l_w, oneplushalf, j, flags_stride, vsc); \
685
0
                } \
686
0
            } \
687
0
        } \
688
0
}
689
690
static void opj_t1_dec_sigpass_mqc_64x64_novsc(
691
    opj_t1_t *t1,
692
    OPJ_INT32 bpno)
693
0
{
694
0
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
695
0
}
696
697
static void opj_t1_dec_sigpass_mqc_64x64_vsc(
698
    opj_t1_t *t1,
699
    OPJ_INT32 bpno)
700
0
{
701
0
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
702
0
}
703
704
static void opj_t1_dec_sigpass_mqc_generic_novsc(
705
    opj_t1_t *t1,
706
    OPJ_INT32 bpno)
707
0
{
708
0
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
709
0
                                    t1->w + 2U);
710
0
}
711
712
static void opj_t1_dec_sigpass_mqc_generic_vsc(
713
    opj_t1_t *t1,
714
    OPJ_INT32 bpno)
715
0
{
716
0
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
717
0
                                    t1->w + 2U);
718
0
}
719
720
static void opj_t1_dec_sigpass_mqc(
721
    opj_t1_t *t1,
722
    OPJ_INT32 bpno,
723
    OPJ_INT32 cblksty)
724
0
{
725
0
    if (t1->w == 64 && t1->h == 64) {
726
0
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
727
0
            opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
728
0
        } else {
729
0
            opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
730
0
        }
731
0
    } else {
732
0
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
733
0
            opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
734
0
        } else {
735
0
            opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
736
0
        }
737
0
    }
738
0
}
739
740
/**
741
Encode refinement pass step
742
*/
743
208k
#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
744
208k
{\
745
208k
    OPJ_UINT32 v; \
746
208k
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
747
208k
        const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
748
208k
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
749
208k
        OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
750
208k
        *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
751
208k
                                          (OPJ_UINT32)bpno); \
752
208k
        v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
753
208k
/* #ifdef DEBUG_ENC_REF */ \
754
208k
/*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
755
208k
/* #endif */ \
756
208k
        opj_t1_setcurctx(curctx, ctxt); \
757
208k
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
758
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
759
208k
        } else { \
760
208k
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
761
208k
        } \
762
208k
        flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
763
208k
    } \
764
208k
}
765
766
767
static INLINE void opj_t1_dec_refpass_step_raw(
768
    opj_t1_t *t1,
769
    opj_flag_t *flagsp,
770
    OPJ_INT32 *datap,
771
    OPJ_INT32 poshalf,
772
    OPJ_UINT32 ci)
773
0
{
774
0
    OPJ_UINT32 v;
775
776
0
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
777
778
0
    if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
779
0
            (T1_SIGMA_THIS << (ci * 3U))) {
780
0
        v = opj_mqc_raw_decode(mqc);
781
0
        *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
782
0
        *flagsp |= T1_MU_THIS << (ci * 3U);
783
0
    }
784
0
}
785
786
#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
787
0
                                          mqc, curctx, v, a, c, ct, poshalf) \
788
0
{ \
789
0
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
790
0
            (T1_SIGMA_THIS << (ci * 3U))) { \
791
0
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
792
0
        opj_t1_setcurctx(curctx, ctxt); \
793
0
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
794
0
        data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
795
0
        flags |= T1_MU_THIS << (ci * 3U); \
796
0
    } \
797
0
}
798
799
static INLINE void opj_t1_dec_refpass_step_mqc(
800
    opj_t1_t *t1,
801
    opj_flag_t *flagsp,
802
    OPJ_INT32 *datap,
803
    OPJ_INT32 poshalf,
804
    OPJ_UINT32 ci)
805
0
{
806
0
    OPJ_UINT32 v;
807
808
0
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
809
0
    opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
810
0
                                      mqc, mqc->curctx, v, mqc->a, mqc->c,
811
0
                                      mqc->ct, poshalf);
812
0
}
813
814
static void opj_t1_enc_refpass(
815
    opj_t1_t *t1,
816
    OPJ_INT32 bpno,
817
    OPJ_INT32 *nmsedec,
818
    OPJ_BYTE type)
819
7.78k
{
820
7.78k
    OPJ_UINT32 i, k;
821
7.78k
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
822
7.78k
    opj_flag_t* f = &T1_FLAGS(0, 0);
823
7.78k
    const OPJ_UINT32 extra = 2U;
824
7.78k
    opj_mqc_t* mqc = &(t1->mqc);
825
7.78k
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
826
7.78k
    const OPJ_INT32* datap = t1->data;
827
828
7.78k
    *nmsedec = 0;
829
#ifdef DEBUG_ENC_REF
830
    fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
831
#endif
832
14.8k
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
833
#ifdef DEBUG_ENC_REF
834
        fprintf(stderr, " k=%d\n", k);
835
#endif
836
45.8k
        for (i = 0; i < t1->w; ++i, f++, datap += 4) {
837
38.7k
            const OPJ_UINT32 flags = *f;
838
38.7k
            OPJ_UINT32 flagsUpdated = flags;
839
#ifdef DEBUG_ENC_REF
840
            fprintf(stderr, " i=%d\n", i);
841
#endif
842
38.7k
            if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
843
                /* none significant */
844
0
                continue;
845
0
            }
846
38.7k
            if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
847
38.7k
                    (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
848
                /* all processed by sigpass */
849
0
                continue;
850
0
            }
851
852
38.7k
            opj_t1_enc_refpass_step_macro(
853
38.7k
                mqc, curctx, a, c, ct,
854
38.7k
                flags, flagsUpdated,
855
38.7k
                &datap[0],
856
38.7k
                bpno,
857
38.7k
                one,
858
38.7k
                nmsedec,
859
38.7k
                type,
860
38.7k
                0);
861
38.7k
            opj_t1_enc_refpass_step_macro(
862
38.7k
                mqc, curctx, a, c, ct,
863
38.7k
                flags, flagsUpdated,
864
38.7k
                &datap[1],
865
38.7k
                bpno,
866
38.7k
                one,
867
38.7k
                nmsedec,
868
38.7k
                type,
869
38.7k
                1);
870
38.7k
            opj_t1_enc_refpass_step_macro(
871
38.7k
                mqc, curctx, a, c, ct,
872
38.7k
                flags, flagsUpdated,
873
38.7k
                &datap[2],
874
38.7k
                bpno,
875
38.7k
                one,
876
38.7k
                nmsedec,
877
38.7k
                type,
878
38.7k
                2);
879
38.7k
            opj_t1_enc_refpass_step_macro(
880
38.7k
                mqc, curctx, a, c, ct,
881
38.7k
                flags, flagsUpdated,
882
38.7k
                &datap[3],
883
38.7k
                bpno,
884
38.7k
                one,
885
38.7k
                nmsedec,
886
38.7k
                type,
887
38.7k
                3);
888
38.7k
            *f = flagsUpdated;
889
38.7k
        }
890
7.06k
    }
891
892
7.78k
    if (k < t1->h) {
893
6.22k
        OPJ_UINT32 j;
894
6.22k
        const OPJ_UINT32 remaining_lines = t1->h - k;
895
#ifdef DEBUG_ENC_REF
896
        fprintf(stderr, " k=%d\n", k);
897
#endif
898
36.3k
        for (i = 0; i < t1->w; ++i, ++f) {
899
#ifdef DEBUG_ENC_REF
900
            fprintf(stderr, " i=%d\n", i);
901
#endif
902
30.1k
            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
903
                /* none significant */
904
0
                datap += remaining_lines;
905
0
                continue;
906
0
            }
907
83.2k
            for (j = 0; j < remaining_lines; ++j, datap ++) {
908
53.1k
                opj_t1_enc_refpass_step_macro(
909
53.1k
                    mqc, curctx, a, c, ct,
910
53.1k
                    *f, *f,
911
53.1k
                    &datap[0],
912
53.1k
                    bpno,
913
53.1k
                    one,
914
53.1k
                    nmsedec,
915
53.1k
                    type,
916
53.1k
                    j);
917
53.1k
            }
918
30.1k
        }
919
6.22k
    }
920
921
7.78k
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
922
7.78k
}
923
924
925
static void opj_t1_dec_refpass_raw(
926
    opj_t1_t *t1,
927
    OPJ_INT32 bpno)
928
0
{
929
0
    OPJ_INT32 one, poshalf;
930
0
    OPJ_UINT32 i, j, k;
931
0
    OPJ_INT32 *data = t1->data;
932
0
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
933
0
    const OPJ_UINT32 l_w = t1->w;
934
0
    one = 1 << bpno;
935
0
    poshalf = one >> 1;
936
0
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
937
0
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
938
0
            opj_flag_t flags = *flagsp;
939
0
            if (flags != 0) {
940
0
                opj_t1_dec_refpass_step_raw(
941
0
                    t1,
942
0
                    flagsp,
943
0
                    data,
944
0
                    poshalf,
945
0
                    0U);
946
0
                opj_t1_dec_refpass_step_raw(
947
0
                    t1,
948
0
                    flagsp,
949
0
                    data + l_w,
950
0
                    poshalf,
951
0
                    1U);
952
0
                opj_t1_dec_refpass_step_raw(
953
0
                    t1,
954
0
                    flagsp,
955
0
                    data + 2 * l_w,
956
0
                    poshalf,
957
0
                    2U);
958
0
                opj_t1_dec_refpass_step_raw(
959
0
                    t1,
960
0
                    flagsp,
961
0
                    data + 3 * l_w,
962
0
                    poshalf,
963
0
                    3U);
964
0
            }
965
0
        }
966
0
    }
967
0
    if (k < t1->h) {
968
0
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
969
0
            for (j = 0; j < t1->h - k; ++j) {
970
0
                opj_t1_dec_refpass_step_raw(
971
0
                    t1,
972
0
                    flagsp,
973
0
                    data + j * l_w,
974
0
                    poshalf,
975
0
                    j);
976
0
            }
977
0
        }
978
0
    }
979
0
}
980
981
0
#define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
982
0
{ \
983
0
        OPJ_INT32 one, poshalf; \
984
0
        OPJ_UINT32 i, j, k; \
985
0
        register OPJ_INT32 *data = t1->data; \
986
0
        register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
987
0
        const OPJ_UINT32 l_w = w; \
988
0
        opj_mqc_t* mqc = &(t1->mqc); \
989
0
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
990
0
        register OPJ_UINT32 v; \
991
0
        one = 1 << bpno; \
992
0
        poshalf = one >> 1; \
993
0
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
994
0
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
995
0
                        opj_flag_t flags = *flagsp; \
996
0
                        if( flags != 0 ) { \
997
0
                            opj_t1_dec_refpass_step_mqc_macro( \
998
0
                                flags, data, l_w, 0, \
999
0
                                mqc, curctx, v, a, c, ct, poshalf); \
1000
0
                            opj_t1_dec_refpass_step_mqc_macro( \
1001
0
                                flags, data, l_w, 1, \
1002
0
                                mqc, curctx, v, a, c, ct, poshalf); \
1003
0
                            opj_t1_dec_refpass_step_mqc_macro( \
1004
0
                                flags, data, l_w, 2, \
1005
0
                                mqc, curctx, v, a, c, ct, poshalf); \
1006
0
                            opj_t1_dec_refpass_step_mqc_macro( \
1007
0
                                flags, data, l_w, 3, \
1008
0
                                mqc, curctx, v, a, c, ct, poshalf); \
1009
0
                            *flagsp = flags; \
1010
0
                        } \
1011
0
                } \
1012
0
        } \
1013
0
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1014
0
        if( k < h ) { \
1015
0
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1016
0
                for (j = 0; j < h - k; ++j) { \
1017
0
                        opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1018
0
                } \
1019
0
            } \
1020
0
        } \
1021
0
}
1022
1023
static void opj_t1_dec_refpass_mqc_64x64(
1024
    opj_t1_t *t1,
1025
    OPJ_INT32 bpno)
1026
0
{
1027
0
    opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1028
0
}
1029
1030
static void opj_t1_dec_refpass_mqc_generic(
1031
    opj_t1_t *t1,
1032
    OPJ_INT32 bpno)
1033
0
{
1034
0
    opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1035
0
}
1036
1037
static void opj_t1_dec_refpass_mqc(
1038
    opj_t1_t *t1,
1039
    OPJ_INT32 bpno)
1040
0
{
1041
0
    if (t1->w == 64 && t1->h == 64) {
1042
0
        opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1043
0
    } else {
1044
0
        opj_t1_dec_refpass_mqc_generic(t1, bpno);
1045
0
    }
1046
0
}
1047
1048
/**
1049
Encode clean-up pass step
1050
*/
1051
75.8k
#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
1052
75.8k
{ \
1053
75.8k
    OPJ_UINT32 v; \
1054
75.8k
    OPJ_UINT32 ci; \
1055
75.8k
    opj_flag_t* const flagsp = (flagspIn); \
1056
75.8k
    const OPJ_INT32* l_datap = (datapIn); \
1057
75.8k
    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
1058
75.8k
                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1059
75.8k
 \
1060
75.8k
    if ((*flagsp & check) == check) { \
1061
0
        if (runlen == 0) { \
1062
0
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1063
0
        } else if (runlen == 1) { \
1064
0
            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
1065
0
        } else if (runlen == 2) { \
1066
0
            *flagsp &= ~(T1_PI_2 | T1_PI_3); \
1067
0
        } else if (runlen == 3) { \
1068
0
            *flagsp &= ~(T1_PI_3); \
1069
0
        } \
1070
0
    } \
1071
75.8k
    else \
1072
305k
    for (ci = runlen; ci < lim; ++ci) { \
1073
229k
        OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
1074
229k
        if ((agg != 0) && (ci == runlen)) { \
1075
513
            goto_PARTIAL = OPJ_TRUE; \
1076
513
        } \
1077
229k
        else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
1078
20.6k
            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
1079
20.6k
/* #ifdef DEBUG_ENC_CLN */ \
1080
20.6k
/*            printf("   ctxt1=%d\n", ctxt1); */ \
1081
20.6k
/* #endif */ \
1082
20.6k
            opj_t1_setcurctx(curctx, ctxt1); \
1083
20.6k
            v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
1084
20.6k
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
1085
20.6k
            if (v) { \
1086
20.6k
                goto_PARTIAL = OPJ_TRUE; \
1087
20.6k
            } \
1088
20.6k
        } \
1089
229k
        if( goto_PARTIAL ) { \
1090
21.1k
            OPJ_UINT32 vsc; \
1091
21.1k
            OPJ_UINT32 ctxt2, spb; \
1092
21.1k
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1093
21.1k
                        *flagsp, \
1094
21.1k
                        flagsp[-1], flagsp[1], \
1095
21.1k
                        ci); \
1096
21.1k
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
1097
21.1k
                                                (OPJ_UINT32)bpno); \
1098
21.1k
            ctxt2 = opj_t1_getctxno_sc(lu); \
1099
21.1k
/* #ifdef DEBUG_ENC_CLN */ \
1100
21.1k
/*           printf("   ctxt2=%d\n", ctxt2); */ \
1101
21.1k
/* #endif */ \
1102
21.1k
            opj_t1_setcurctx(curctx, ctxt2); \
1103
21.1k
 \
1104
21.1k
            v = opj_smr_sign(*l_datap); \
1105
21.1k
            spb = opj_t1_getspb(lu); \
1106
21.1k
/* #ifdef DEBUG_ENC_CLN */ \
1107
21.1k
/*           printf("   spb=%d\n", spb); */\
1108
21.1k
/* #endif */ \
1109
21.1k
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
1110
21.1k
            vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
1111
21.1k
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
1112
21.1k
        } \
1113
229k
        *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
1114
229k
        l_datap ++; \
1115
229k
    } \
1116
75.8k
}
1117
1118
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1119
                                      flags, flagsp, flags_stride, data, \
1120
                                      data_stride, ci, mqc, curctx, \
1121
0
                                      v, a, c, ct, oneplushalf, vsc) \
1122
0
{ \
1123
0
    if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1124
0
        do { \
1125
0
            if( !partial ) { \
1126
0
                OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1127
0
                opj_t1_setcurctx(curctx, ctxt1); \
1128
0
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1129
0
                if( !v ) \
1130
0
                    break; \
1131
0
            } \
1132
0
            { \
1133
0
                OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1134
0
                                    flags, flagsp[-1], flagsp[1], \
1135
0
                                    ci); \
1136
0
                opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1137
0
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1138
0
                v = v ^ opj_t1_getspb(lu); \
1139
0
                data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1140
0
                opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1141
0
            } \
1142
0
        } while(0); \
1143
0
    } \
1144
0
}
1145
1146
static void opj_t1_dec_clnpass_step(
1147
    opj_t1_t *t1,
1148
    opj_flag_t *flagsp,
1149
    OPJ_INT32 *datap,
1150
    OPJ_INT32 oneplushalf,
1151
    OPJ_UINT32 ci,
1152
    OPJ_UINT32 vsc)
1153
0
{
1154
0
    OPJ_UINT32 v;
1155
1156
0
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1157
0
    opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1158
0
                                  *flagsp, flagsp, t1->w + 2U, datap,
1159
0
                                  0, ci, mqc, mqc->curctx,
1160
0
                                  v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1161
0
}
1162
1163
static void opj_t1_enc_clnpass(
1164
    opj_t1_t *t1,
1165
    OPJ_INT32 bpno,
1166
    OPJ_INT32 *nmsedec,
1167
    OPJ_UINT32 cblksty)
1168
8.56k
{
1169
8.56k
    OPJ_UINT32 i, k;
1170
8.56k
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1171
8.56k
    opj_mqc_t* mqc = &(t1->mqc);
1172
8.56k
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1173
8.56k
    const OPJ_INT32* datap = t1->data;
1174
8.56k
    opj_flag_t *f = &T1_FLAGS(0, 0);
1175
8.56k
    const OPJ_UINT32 extra = 2U;
1176
1177
8.56k
    *nmsedec = 0;
1178
#ifdef DEBUG_ENC_CLN
1179
    printf("enc_clnpass: bpno=%d\n", bpno);
1180
#endif
1181
16.3k
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
1182
#ifdef DEBUG_ENC_CLN
1183
        printf(" k=%d\n", k);
1184
#endif
1185
50.4k
        for (i = 0; i < t1->w; ++i, f++) {
1186
42.6k
            OPJ_UINT32 agg, runlen;
1187
#ifdef DEBUG_ENC_CLN
1188
            printf("  i=%d\n", i);
1189
#endif
1190
42.6k
            agg = !*f;
1191
#ifdef DEBUG_ENC_CLN
1192
            printf("   agg=%d\n", agg);
1193
#endif
1194
42.6k
            if (agg) {
1195
513
                for (runlen = 0; runlen < 4; ++runlen, ++datap) {
1196
513
                    if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
1197
513
                        break;
1198
513
                    }
1199
513
                }
1200
513
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
1201
513
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
1202
513
                if (runlen == 4) {
1203
0
                    continue;
1204
0
                }
1205
513
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
1206
513
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
1207
513
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
1208
42.1k
            } else {
1209
42.1k
                runlen = 0;
1210
42.1k
            }
1211
42.6k
            opj_t1_enc_clnpass_step_macro(
1212
42.6k
                mqc, curctx, a, c, ct,
1213
42.6k
                f,
1214
42.6k
                datap,
1215
42.6k
                bpno,
1216
42.6k
                one,
1217
42.6k
                nmsedec,
1218
42.6k
                agg,
1219
42.6k
                runlen,
1220
42.6k
                4U,
1221
42.6k
                cblksty);
1222
42.6k
            datap += 4 - runlen;
1223
42.6k
        }
1224
7.78k
    }
1225
8.56k
    if (k < t1->h) {
1226
6.84k
        const OPJ_UINT32 agg = 0;
1227
6.84k
        const OPJ_UINT32 runlen = 0;
1228
#ifdef DEBUG_ENC_CLN
1229
        printf(" k=%d\n", k);
1230
#endif
1231
40.0k
        for (i = 0; i < t1->w; ++i, f++) {
1232
#ifdef DEBUG_ENC_CLN
1233
            printf("  i=%d\n", i);
1234
            printf("   agg=%d\n", agg);
1235
#endif
1236
33.1k
            opj_t1_enc_clnpass_step_macro(
1237
33.1k
                mqc, curctx, a, c, ct,
1238
33.1k
                f,
1239
33.1k
                datap,
1240
33.1k
                bpno,
1241
33.1k
                one,
1242
33.1k
                nmsedec,
1243
33.1k
                agg,
1244
33.1k
                runlen,
1245
33.1k
                t1->h - k,
1246
33.1k
                cblksty);
1247
33.1k
            datap += t1->h - k;
1248
33.1k
        }
1249
6.84k
    }
1250
1251
8.56k
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1252
8.56k
}
1253
1254
0
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1255
0
{ \
1256
0
    OPJ_INT32 one, half, oneplushalf; \
1257
0
    OPJ_UINT32 runlen; \
1258
0
    OPJ_UINT32 i, j, k; \
1259
0
    const OPJ_UINT32 l_w = w; \
1260
0
    opj_mqc_t* mqc = &(t1->mqc); \
1261
0
    register OPJ_INT32 *data = t1->data; \
1262
0
    register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1263
0
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1264
0
    register OPJ_UINT32 v; \
1265
0
    one = 1 << bpno; \
1266
0
    half = one >> 1; \
1267
0
    oneplushalf = one | half; \
1268
0
    for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1269
0
        for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1270
0
            opj_flag_t flags = *flagsp; \
1271
0
            if (flags == 0) { \
1272
0
                OPJ_UINT32 partial = OPJ_TRUE; \
1273
0
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1274
0
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1275
0
                if (!v) { \
1276
0
                    continue; \
1277
0
                } \
1278
0
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1279
0
                opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1280
0
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1281
0
                runlen = (runlen << 1) | v; \
1282
0
                switch(runlen) { \
1283
0
                    case 0: \
1284
0
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1285
0
                                            flags, flagsp, flags_stride, data, \
1286
0
                                            l_w, 0, mqc, curctx, \
1287
0
                                            v, a, c, ct, oneplushalf, vsc); \
1288
0
                        partial = OPJ_FALSE; \
1289
0
                        /* FALLTHRU */ \
1290
0
                    case 1: \
1291
0
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1292
0
                                            flags, flagsp, flags_stride, data, \
1293
0
                                            l_w, 1, mqc, curctx, \
1294
0
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1295
0
                        partial = OPJ_FALSE; \
1296
0
                        /* FALLTHRU */ \
1297
0
                    case 2: \
1298
0
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1299
0
                                            flags, flagsp, flags_stride, data, \
1300
0
                                            l_w, 2, mqc, curctx, \
1301
0
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1302
0
                        partial = OPJ_FALSE; \
1303
0
                        /* FALLTHRU */ \
1304
0
                    case 3: \
1305
0
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1306
0
                                            flags, flagsp, flags_stride, data, \
1307
0
                                            l_w, 3, mqc, curctx, \
1308
0
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1309
0
                        break; \
1310
0
                } \
1311
0
            } else { \
1312
0
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1313
0
                                    flags, flagsp, flags_stride, data, \
1314
0
                                    l_w, 0, mqc, curctx, \
1315
0
                                    v, a, c, ct, oneplushalf, vsc); \
1316
0
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1317
0
                                    flags, flagsp, flags_stride, data, \
1318
0
                                    l_w, 1, mqc, curctx, \
1319
0
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1320
0
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1321
0
                                    flags, flagsp, flags_stride, data, \
1322
0
                                    l_w, 2, mqc, curctx, \
1323
0
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1324
0
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1325
0
                                    flags, flagsp, flags_stride, data, \
1326
0
                                    l_w, 3, mqc, curctx, \
1327
0
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1328
0
            } \
1329
0
            *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1330
0
        } \
1331
0
    } \
1332
0
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1333
0
    if( k < h ) { \
1334
0
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1335
0
            for (j = 0; j < h - k; ++j) { \
1336
0
                opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1337
0
            } \
1338
0
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1339
0
        } \
1340
0
    } \
1341
0
}
1342
1343
static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1344
0
{
1345
0
    if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1346
0
        opj_mqc_t* mqc = &(t1->mqc);
1347
0
        OPJ_UINT32 v, v2;
1348
0
        opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1349
0
        opj_mqc_decode(v, mqc);
1350
0
        opj_mqc_decode(v2, mqc);
1351
0
        v = (v << 1) | v2;
1352
0
        opj_mqc_decode(v2, mqc);
1353
0
        v = (v << 1) | v2;
1354
0
        opj_mqc_decode(v2, mqc);
1355
0
        v = (v << 1) | v2;
1356
        /*
1357
        if (v!=0xa) {
1358
            opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1359
        }
1360
        */
1361
0
    }
1362
0
}
1363
1364
static void opj_t1_dec_clnpass_64x64_novsc(
1365
    opj_t1_t *t1,
1366
    OPJ_INT32 bpno)
1367
0
{
1368
0
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1369
0
}
1370
1371
static void opj_t1_dec_clnpass_64x64_vsc(
1372
    opj_t1_t *t1,
1373
    OPJ_INT32 bpno)
1374
0
{
1375
0
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1376
0
}
1377
1378
static void opj_t1_dec_clnpass_generic_novsc(
1379
    opj_t1_t *t1,
1380
    OPJ_INT32 bpno)
1381
0
{
1382
0
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1383
0
                                t1->w + 2U);
1384
0
}
1385
1386
static void opj_t1_dec_clnpass_generic_vsc(
1387
    opj_t1_t *t1,
1388
    OPJ_INT32 bpno)
1389
0
{
1390
0
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1391
0
                                t1->w + 2U);
1392
0
}
1393
1394
static void opj_t1_dec_clnpass(
1395
    opj_t1_t *t1,
1396
    OPJ_INT32 bpno,
1397
    OPJ_INT32 cblksty)
1398
0
{
1399
0
    if (t1->w == 64 && t1->h == 64) {
1400
0
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1401
0
            opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1402
0
        } else {
1403
0
            opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1404
0
        }
1405
0
    } else {
1406
0
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1407
0
            opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1408
0
        } else {
1409
0
            opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1410
0
        }
1411
0
    }
1412
0
    opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1413
0
}
1414
1415
1416
static OPJ_FLOAT64 opj_t1_getwmsedec(
1417
    OPJ_INT32 nmsedec,
1418
    OPJ_UINT32 compno,
1419
    OPJ_UINT32 level,
1420
    OPJ_UINT32 orient,
1421
    OPJ_INT32 bpno,
1422
    OPJ_UINT32 qmfbid,
1423
    OPJ_FLOAT64 stepsize,
1424
    OPJ_UINT32 numcomps,
1425
    const OPJ_FLOAT64 * mct_norms,
1426
    OPJ_UINT32 mct_numcomps)
1427
24.1k
{
1428
24.1k
    OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1429
24.1k
    OPJ_ARG_NOT_USED(numcomps);
1430
1431
24.1k
    if (mct_norms && (compno < mct_numcomps)) {
1432
0
        w1 = mct_norms[compno];
1433
0
    }
1434
1435
24.1k
    if (qmfbid == 1) {
1436
6.93k
        w2 = opj_dwt_getnorm(level, orient);
1437
17.2k
    } else {    /* if (qmfbid == 0) */
1438
17.2k
        const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1439
17.2k
                                    (orient == 3) ? 2 : 1;
1440
17.2k
        w2 = opj_dwt_getnorm_real(level, orient);
1441
        /* Not sure this is right. But preserves past behaviour */
1442
17.2k
        stepsize /= (1 << log2_gain);
1443
17.2k
    }
1444
1445
24.1k
    wmsedec = w1 * w2 * stepsize * (1 << bpno);
1446
24.1k
    wmsedec *= wmsedec * nmsedec / 8192.0;
1447
1448
24.1k
    return wmsedec;
1449
24.1k
}
1450
1451
static OPJ_BOOL opj_t1_allocate_buffers(
1452
    opj_t1_t *t1,
1453
    OPJ_UINT32 w,
1454
    OPJ_UINT32 h)
1455
17.3k
{
1456
17.3k
    OPJ_UINT32 flagssize;
1457
17.3k
    OPJ_UINT32 flags_stride;
1458
1459
    /* No risk of overflow. Prior checks ensure those assert are met */
1460
    /* They are per the specification */
1461
17.3k
    assert(w <= 1024);
1462
17.3k
    assert(h <= 1024);
1463
17.3k
    assert(w * h <= 4096);
1464
1465
    /* encoder uses tile buffer, so no need to allocate */
1466
17.3k
    {
1467
17.3k
        OPJ_UINT32 datasize = w * h;
1468
1469
17.3k
        if (datasize > t1->datasize) {
1470
1.60k
            opj_aligned_free(t1->data);
1471
1.60k
            t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1472
1.60k
            if (!t1->data) {
1473
                /* FIXME event manager error callback */
1474
0
                return OPJ_FALSE;
1475
0
            }
1476
1.60k
            t1->datasize = datasize;
1477
1.60k
        }
1478
        /* memset first arg is declared to never be null by gcc */
1479
17.3k
        if (t1->data != NULL) {
1480
17.3k
            memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1481
17.3k
        }
1482
17.3k
    }
1483
1484
0
    flags_stride = w + 2U; /* can't be 0U */
1485
1486
17.3k
    flagssize = (h + 3U) / 4U + 2U;
1487
1488
17.3k
    flagssize *= flags_stride;
1489
17.3k
    {
1490
17.3k
        opj_flag_t* p;
1491
17.3k
        OPJ_UINT32 x;
1492
17.3k
        OPJ_UINT32 flags_height = (h + 3U) / 4U;
1493
1494
17.3k
        if (flagssize > t1->flagssize) {
1495
1496
6.76k
            opj_aligned_free(t1->flags);
1497
6.76k
            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1498
6.76k
                            opj_flag_t));
1499
6.76k
            if (!t1->flags) {
1500
                /* FIXME event manager error callback */
1501
0
                return OPJ_FALSE;
1502
0
            }
1503
6.76k
        }
1504
17.3k
        t1->flagssize = flagssize;
1505
1506
17.3k
        memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1507
1508
17.3k
        p = &t1->flags[0];
1509
517k
        for (x = 0; x < flags_stride; ++x) {
1510
            /* magic value to hopefully stop any passes being interested in this entry */
1511
500k
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1512
500k
        }
1513
1514
17.3k
        p = &t1->flags[((flags_height + 1) * flags_stride)];
1515
517k
        for (x = 0; x < flags_stride; ++x) {
1516
            /* magic value to hopefully stop any passes being interested in this entry */
1517
500k
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1518
500k
        }
1519
1520
17.3k
        if (h % 4) {
1521
9.53k
            OPJ_UINT32 v = 0;
1522
9.53k
            p = &t1->flags[((flags_height) * flags_stride)];
1523
9.53k
            if (h % 4 == 1) {
1524
3.69k
                v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1525
5.83k
            } else if (h % 4 == 2) {
1526
3.00k
                v |= T1_PI_2 | T1_PI_3;
1527
3.00k
            } else if (h % 4 == 3) {
1528
2.82k
                v |= T1_PI_3;
1529
2.82k
            }
1530
235k
            for (x = 0; x < flags_stride; ++x) {
1531
226k
                *p++ = v;
1532
226k
            }
1533
9.53k
        }
1534
17.3k
    }
1535
1536
0
    t1->w = w;
1537
17.3k
    t1->h = h;
1538
1539
17.3k
    return OPJ_TRUE;
1540
17.3k
}
1541
1542
/* ----------------------------------------------------------------------- */
1543
1544
/* ----------------------------------------------------------------------- */
1545
/**
1546
 * Creates a new Tier 1 handle
1547
 * and initializes the look-up tables of the Tier-1 coder/decoder
1548
 * @return a new T1 handle if successful, returns NULL otherwise
1549
*/
1550
opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1551
260
{
1552
260
    opj_t1_t *l_t1 = 00;
1553
1554
260
    l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1555
260
    if (!l_t1) {
1556
0
        return 00;
1557
0
    }
1558
1559
260
    l_t1->encoder = isEncoder;
1560
1561
260
    return l_t1;
1562
260
}
1563
1564
1565
/**
1566
 * Destroys a previously created T1 handle
1567
 *
1568
 * @param p_t1 Tier 1 handle to destroy
1569
*/
1570
void opj_t1_destroy(opj_t1_t *p_t1)
1571
260
{
1572
260
    if (! p_t1) {
1573
0
        return;
1574
0
    }
1575
1576
260
    if (p_t1->data) {
1577
260
        opj_aligned_free(p_t1->data);
1578
260
        p_t1->data = 00;
1579
260
    }
1580
1581
260
    if (p_t1->flags) {
1582
260
        opj_aligned_free(p_t1->flags);
1583
260
        p_t1->flags = 00;
1584
260
    }
1585
1586
260
    opj_free(p_t1->cblkdatabuffer);
1587
1588
260
    opj_free(p_t1);
1589
260
}
1590
1591
typedef struct {
1592
    OPJ_BOOL whole_tile_decoding;
1593
    OPJ_UINT32 resno;
1594
    opj_tcd_cblk_dec_t* cblk;
1595
    opj_tcd_band_t* band;
1596
    opj_tcd_tilecomp_t* tilec;
1597
    opj_tccp_t* tccp;
1598
    OPJ_BOOL mustuse_cblkdatabuffer;
1599
    volatile OPJ_BOOL* pret;
1600
    opj_event_mgr_t *p_manager;
1601
    opj_mutex_t* p_manager_mutex;
1602
    OPJ_BOOL check_pterm;
1603
} opj_t1_cblk_decode_processing_job_t;
1604
1605
static void opj_t1_destroy_wrapper(void* t1)
1606
260
{
1607
260
    opj_t1_destroy((opj_t1_t*) t1);
1608
260
}
1609
1610
static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1611
0
{
1612
0
    opj_tcd_cblk_dec_t* cblk;
1613
0
    opj_tcd_band_t* band;
1614
0
    opj_tcd_tilecomp_t* tilec;
1615
0
    opj_tccp_t* tccp;
1616
0
    OPJ_INT32* OPJ_RESTRICT datap;
1617
0
    OPJ_UINT32 cblk_w, cblk_h;
1618
0
    OPJ_INT32 x, y;
1619
0
    OPJ_UINT32 i, j;
1620
0
    opj_t1_cblk_decode_processing_job_t* job;
1621
0
    opj_t1_t* t1;
1622
0
    OPJ_UINT32 resno;
1623
0
    OPJ_UINT32 tile_w;
1624
1625
0
    job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1626
1627
0
    cblk = job->cblk;
1628
1629
0
    if (!job->whole_tile_decoding) {
1630
0
        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1631
0
        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1632
1633
0
        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1634
0
                             cblk_w * cblk_h);
1635
0
        if (cblk->decoded_data == NULL) {
1636
0
            if (job->p_manager_mutex) {
1637
0
                opj_mutex_lock(job->p_manager_mutex);
1638
0
            }
1639
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1640
0
                          "Cannot allocate cblk->decoded_data\n");
1641
0
            if (job->p_manager_mutex) {
1642
0
                opj_mutex_unlock(job->p_manager_mutex);
1643
0
            }
1644
0
            *(job->pret) = OPJ_FALSE;
1645
0
            opj_free(job);
1646
0
            return;
1647
0
        }
1648
        /* Zero-init required */
1649
0
        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1650
0
    } else if (cblk->decoded_data) {
1651
        /* Not sure if that code path can happen, but better be */
1652
        /* safe than sorry */
1653
0
        opj_aligned_free(cblk->decoded_data);
1654
0
        cblk->decoded_data = NULL;
1655
0
    }
1656
1657
0
    resno = job->resno;
1658
0
    band = job->band;
1659
0
    tilec = job->tilec;
1660
0
    tccp = job->tccp;
1661
0
    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1662
0
                          -
1663
0
                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1664
1665
0
    if (!*(job->pret)) {
1666
0
        opj_free(job);
1667
0
        return;
1668
0
    }
1669
1670
0
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1671
0
    if (t1 == NULL) {
1672
0
        t1 = opj_t1_create(OPJ_FALSE);
1673
0
        if (t1 == NULL) {
1674
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1675
0
                          "Cannot allocate Tier 1 handle\n");
1676
0
            *(job->pret) = OPJ_FALSE;
1677
0
            opj_free(job);
1678
0
            return;
1679
0
        }
1680
0
        if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
1681
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1682
0
                          "Unable to set t1 handle as TLS\n");
1683
0
            opj_t1_destroy(t1);
1684
0
            *(job->pret) = OPJ_FALSE;
1685
0
            opj_free(job);
1686
0
            return;
1687
0
        }
1688
0
    }
1689
0
    t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1690
1691
0
    if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
1692
0
        if (OPJ_FALSE == opj_t1_ht_decode_cblk(
1693
0
                    t1,
1694
0
                    cblk,
1695
0
                    band->bandno,
1696
0
                    (OPJ_UINT32)tccp->roishift,
1697
0
                    tccp->cblksty,
1698
0
                    job->p_manager,
1699
0
                    job->p_manager_mutex,
1700
0
                    job->check_pterm)) {
1701
0
            *(job->pret) = OPJ_FALSE;
1702
0
            opj_free(job);
1703
0
            return;
1704
0
        }
1705
0
    } else {
1706
0
        if (OPJ_FALSE == opj_t1_decode_cblk(
1707
0
                    t1,
1708
0
                    cblk,
1709
0
                    band->bandno,
1710
0
                    (OPJ_UINT32)tccp->roishift,
1711
0
                    tccp->cblksty,
1712
0
                    job->p_manager,
1713
0
                    job->p_manager_mutex,
1714
0
                    job->check_pterm)) {
1715
0
            *(job->pret) = OPJ_FALSE;
1716
0
            opj_free(job);
1717
0
            return;
1718
0
        }
1719
0
    }
1720
1721
0
    x = cblk->x0 - band->x0;
1722
0
    y = cblk->y0 - band->y0;
1723
0
    if (band->bandno & 1) {
1724
0
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1725
0
        x += pres->x1 - pres->x0;
1726
0
    }
1727
0
    if (band->bandno & 2) {
1728
0
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1729
0
        y += pres->y1 - pres->y0;
1730
0
    }
1731
1732
0
    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1733
0
    cblk_w = t1->w;
1734
0
    cblk_h = t1->h;
1735
1736
0
    if (tccp->roishift) {
1737
0
        if (tccp->roishift >= 31) {
1738
0
            for (j = 0; j < cblk_h; ++j) {
1739
0
                for (i = 0; i < cblk_w; ++i) {
1740
0
                    datap[(j * cblk_w) + i] = 0;
1741
0
                }
1742
0
            }
1743
0
        } else {
1744
0
            OPJ_INT32 thresh = 1 << tccp->roishift;
1745
0
            for (j = 0; j < cblk_h; ++j) {
1746
0
                for (i = 0; i < cblk_w; ++i) {
1747
0
                    OPJ_INT32 val = datap[(j * cblk_w) + i];
1748
0
                    OPJ_INT32 mag = abs(val);
1749
0
                    if (mag >= thresh) {
1750
0
                        mag >>= tccp->roishift;
1751
0
                        datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1752
0
                    }
1753
0
                }
1754
0
            }
1755
0
        }
1756
0
    }
1757
1758
    /* Both can be non NULL if for example decoding a full tile and then */
1759
    /* partially a tile. In which case partial decoding should be the */
1760
    /* priority */
1761
0
    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1762
1763
0
    if (cblk->decoded_data) {
1764
0
        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1765
0
        if (tccp->qmfbid == 1) {
1766
0
            for (i = 0; i < cblk_size; ++i) {
1767
0
                datap[i] /= 2;
1768
0
            }
1769
0
        } else {        /* if (tccp->qmfbid == 0) */
1770
0
            const float stepsize = 0.5f * band->stepsize;
1771
0
            i = 0;
1772
0
#ifdef __SSE2__
1773
0
            {
1774
0
                const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1775
0
                for (; i < (cblk_size & ~15U); i += 16) {
1776
0
                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1777
0
                                                           datap + 0)));
1778
0
                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1779
0
                                                           datap + 4)));
1780
0
                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1781
0
                                                           datap + 8)));
1782
0
                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1783
0
                                                           datap + 12)));
1784
0
                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1785
0
                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1786
0
                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1787
0
                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1788
0
                    datap += 16;
1789
0
                }
1790
0
            }
1791
0
#endif
1792
0
            for (; i < cblk_size; ++i) {
1793
0
                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1794
0
                memcpy(datap, &tmp, sizeof(tmp));
1795
0
                datap++;
1796
0
            }
1797
0
        }
1798
0
    } else if (tccp->qmfbid == 1) {
1799
0
        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1800
0
                                                       (OPJ_SIZE_T)x];
1801
0
        for (j = 0; j < cblk_h; ++j) {
1802
            //positive -> round down aka.  (83)/2 =  41.5 ->  41
1803
            //negative -> round up   aka. (-83)/2 = -41.5 -> -41
1804
#if defined(__AVX512F__)
1805
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1806
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1807
            for (i = 0; i < cblk_w / 16; ++i) {
1808
                __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in));
1809
                const __m512i add_avx = _mm512_srli_epi32(in_avx, 31);
1810
                in_avx = _mm512_add_epi32(in_avx, add_avx);
1811
                _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1));
1812
                ptr_in += 16;
1813
                ptr_out += 16;
1814
            }
1815
1816
            for (i = 0; i < cblk_w % 16; ++i) {
1817
                ptr_out[i] = ptr_in[i] / 2;
1818
            }
1819
#elif defined(__AVX2__)
1820
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1821
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1822
            for (i = 0; i < cblk_w / 8; ++i) {
1823
                __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in));
1824
                const __m256i add_avx = _mm256_srli_epi32(in_avx, 31);
1825
                in_avx = _mm256_add_epi32(in_avx, add_avx);
1826
                _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1));
1827
                ptr_in += 8;
1828
                ptr_out += 8;
1829
            }
1830
1831
            for (i = 0; i < cblk_w % 8; ++i) {
1832
                ptr_out[i] = ptr_in[i] / 2;
1833
            }
1834
#else
1835
0
            i = 0;
1836
0
            for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1837
0
                OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1838
0
                OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1839
0
                OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1840
0
                OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1841
0
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1842
0
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1843
0
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1844
0
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1845
0
            }
1846
0
            for (; i < cblk_w; ++i) {
1847
0
                OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1848
0
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1849
0
            }
1850
0
#endif
1851
0
        }
1852
0
    } else {        /* if (tccp->qmfbid == 0) */
1853
0
        const float stepsize = 0.5f * band->stepsize;
1854
0
        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1855
0
                                                         tile_w + (OPJ_SIZE_T)x];
1856
0
        for (j = 0; j < cblk_h; ++j) {
1857
0
            OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1858
0
            for (i = 0; i < cblk_w; ++i) {
1859
0
                OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1860
0
                *tiledp2 = tmp;
1861
0
                datap++;
1862
0
                tiledp2++;
1863
0
            }
1864
0
            tiledp += tile_w;
1865
0
        }
1866
0
    }
1867
1868
0
    opj_free(job);
1869
0
}
1870
1871
1872
void opj_t1_decode_cblks(opj_tcd_t* tcd,
1873
                         volatile OPJ_BOOL* pret,
1874
                         opj_tcd_tilecomp_t* tilec,
1875
                         opj_tccp_t* tccp,
1876
                         opj_event_mgr_t *p_manager,
1877
                         opj_mutex_t* p_manager_mutex,
1878
                         OPJ_BOOL check_pterm
1879
                        )
1880
0
{
1881
0
    opj_thread_pool_t* tp = tcd->thread_pool;
1882
0
    OPJ_UINT32 resno, bandno, precno, cblkno;
1883
1884
#ifdef DEBUG_VERBOSE
1885
    OPJ_UINT32 codeblocks_decoded = 0;
1886
    printf("Enter opj_t1_decode_cblks()\n");
1887
#endif
1888
1889
0
    for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1890
0
        opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1891
1892
0
        for (bandno = 0; bandno < res->numbands; ++bandno) {
1893
0
            opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1894
1895
0
            for (precno = 0; precno < res->pw * res->ph; ++precno) {
1896
0
                opj_tcd_precinct_t* precinct = &band->precincts[precno];
1897
1898
0
                if (!opj_tcd_is_subband_area_of_interest(tcd,
1899
0
                        tilec->compno,
1900
0
                        resno,
1901
0
                        band->bandno,
1902
0
                        (OPJ_UINT32)precinct->x0,
1903
0
                        (OPJ_UINT32)precinct->y0,
1904
0
                        (OPJ_UINT32)precinct->x1,
1905
0
                        (OPJ_UINT32)precinct->y1)) {
1906
0
                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1907
0
                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1908
0
                        if (cblk->decoded_data) {
1909
#ifdef DEBUG_VERBOSE
1910
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1911
                                   cblk->x0, cblk->y0, resno, bandno);
1912
#endif
1913
0
                            opj_aligned_free(cblk->decoded_data);
1914
0
                            cblk->decoded_data = NULL;
1915
0
                        }
1916
0
                    }
1917
0
                    continue;
1918
0
                }
1919
1920
0
                for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1921
0
                    opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1922
0
                    opj_t1_cblk_decode_processing_job_t* job;
1923
1924
0
                    if (!opj_tcd_is_subband_area_of_interest(tcd,
1925
0
                            tilec->compno,
1926
0
                            resno,
1927
0
                            band->bandno,
1928
0
                            (OPJ_UINT32)cblk->x0,
1929
0
                            (OPJ_UINT32)cblk->y0,
1930
0
                            (OPJ_UINT32)cblk->x1,
1931
0
                            (OPJ_UINT32)cblk->y1)) {
1932
0
                        if (cblk->decoded_data) {
1933
#ifdef DEBUG_VERBOSE
1934
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1935
                                   cblk->x0, cblk->y0, resno, bandno);
1936
#endif
1937
0
                            opj_aligned_free(cblk->decoded_data);
1938
0
                            cblk->decoded_data = NULL;
1939
0
                        }
1940
0
                        continue;
1941
0
                    }
1942
1943
0
                    if (!tcd->whole_tile_decoding) {
1944
0
                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1945
0
                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1946
0
                        if (cblk->decoded_data != NULL) {
1947
#ifdef DEBUG_VERBOSE
1948
                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1949
                                   cblk->x0, cblk->y0, resno, bandno);
1950
#endif
1951
0
                            continue;
1952
0
                        }
1953
0
                        if (cblk_w == 0 || cblk_h == 0) {
1954
0
                            continue;
1955
0
                        }
1956
#ifdef DEBUG_VERBOSE
1957
                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1958
                               cblk->x0, cblk->y0, resno, bandno);
1959
#endif
1960
0
                    }
1961
1962
0
                    job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1963
0
                            sizeof(opj_t1_cblk_decode_processing_job_t));
1964
0
                    if (!job) {
1965
0
                        *pret = OPJ_FALSE;
1966
0
                        return;
1967
0
                    }
1968
0
                    job->whole_tile_decoding = tcd->whole_tile_decoding;
1969
0
                    job->resno = resno;
1970
0
                    job->cblk = cblk;
1971
0
                    job->band = band;
1972
0
                    job->tilec = tilec;
1973
0
                    job->tccp = tccp;
1974
0
                    job->pret = pret;
1975
0
                    job->p_manager_mutex = p_manager_mutex;
1976
0
                    job->p_manager = p_manager;
1977
0
                    job->check_pterm = check_pterm;
1978
0
                    job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1979
0
                    opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1980
#ifdef DEBUG_VERBOSE
1981
                    codeblocks_decoded ++;
1982
#endif
1983
0
                    if (!(*pret)) {
1984
0
                        return;
1985
0
                    }
1986
0
                } /* cblkno */
1987
0
            } /* precno */
1988
0
        } /* bandno */
1989
0
    } /* resno */
1990
1991
#ifdef DEBUG_VERBOSE
1992
    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1993
#endif
1994
0
    return;
1995
0
}
1996
1997
1998
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1999
                                   opj_tcd_cblk_dec_t* cblk,
2000
                                   OPJ_UINT32 orient,
2001
                                   OPJ_UINT32 roishift,
2002
                                   OPJ_UINT32 cblksty,
2003
                                   opj_event_mgr_t *p_manager,
2004
                                   opj_mutex_t* p_manager_mutex,
2005
                                   OPJ_BOOL check_pterm)
2006
0
{
2007
0
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2008
2009
0
    OPJ_INT32 bpno_plus_one;
2010
0
    OPJ_UINT32 passtype;
2011
0
    OPJ_UINT32 segno, passno;
2012
0
    OPJ_BYTE* cblkdata = NULL;
2013
0
    OPJ_UINT32 cblkdataindex = 0;
2014
0
    OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
2015
0
    OPJ_INT32* original_t1_data = NULL;
2016
2017
0
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2018
2019
0
    if (!opj_t1_allocate_buffers(
2020
0
                t1,
2021
0
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2022
0
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2023
0
        return OPJ_FALSE;
2024
0
    }
2025
2026
0
    bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
2027
0
    if (bpno_plus_one >= 31) {
2028
0
        if (p_manager_mutex) {
2029
0
            opj_mutex_lock(p_manager_mutex);
2030
0
        }
2031
0
        opj_event_msg(p_manager, EVT_WARNING,
2032
0
                      "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
2033
0
                      bpno_plus_one);
2034
0
        if (p_manager_mutex) {
2035
0
            opj_mutex_unlock(p_manager_mutex);
2036
0
        }
2037
0
        return OPJ_FALSE;
2038
0
    }
2039
0
    passtype = 2;
2040
2041
0
    opj_mqc_resetstates(mqc);
2042
0
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2043
0
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2044
0
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2045
2046
0
    if (cblk->corrupted) {
2047
0
        assert(cblk->numchunks == 0);
2048
0
        return OPJ_TRUE;
2049
0
    }
2050
2051
    /* Even if we have a single chunk, in multi-threaded decoding */
2052
    /* the insertion of our synthetic marker might potentially override */
2053
    /* valid codestream of other codeblocks decoded in parallel. */
2054
0
    if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer &&
2055
0
                                cblk->numchunks > 0)) {
2056
0
        OPJ_UINT32 i;
2057
0
        OPJ_UINT32 cblk_len;
2058
2059
        /* Compute whole codeblock length from chunk lengths */
2060
0
        cblk_len = 0;
2061
0
        for (i = 0; i < cblk->numchunks; i++) {
2062
0
            cblk_len += cblk->chunks[i].len;
2063
0
        }
2064
2065
        /* Allocate temporary memory if needed */
2066
0
        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
2067
0
            cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
2068
0
                                              cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
2069
0
            if (cblkdata == NULL) {
2070
0
                return OPJ_FALSE;
2071
0
            }
2072
0
            t1->cblkdatabuffer = cblkdata;
2073
0
            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
2074
0
            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
2075
0
        }
2076
2077
        /* Concatenate all chunks */
2078
0
        cblkdata = t1->cblkdatabuffer;
2079
0
        cblk_len = 0;
2080
0
        for (i = 0; i < cblk->numchunks; i++) {
2081
0
            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2082
0
            cblk_len += cblk->chunks[i].len;
2083
0
        }
2084
0
    } else if (cblk->numchunks == 1) {
2085
0
        cblkdata = cblk->chunks[0].data;
2086
0
    } else {
2087
        /* Not sure if that can happen in practice, but avoid Coverity to */
2088
        /* think we will dereference a null cblkdta pointer */
2089
0
        return OPJ_TRUE;
2090
0
    }
2091
2092
    /* For subtile decoding, directly decode in the decoded_data buffer of */
2093
    /* the code-block. Hack t1->data to point to it, and restore it later */
2094
0
    if (cblk->decoded_data) {
2095
0
        original_t1_data = t1->data;
2096
0
        t1->data = cblk->decoded_data;
2097
0
    }
2098
2099
0
    for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2100
0
        opj_tcd_seg_t *seg = &cblk->segs[segno];
2101
2102
        /* BYPASS mode */
2103
0
        type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2104
0
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2105
2106
0
        if (type == T1_TYPE_RAW) {
2107
0
            opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2108
0
                                 OPJ_COMMON_CBLK_DATA_EXTRA);
2109
0
        } else {
2110
0
            opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2111
0
                             OPJ_COMMON_CBLK_DATA_EXTRA);
2112
0
        }
2113
0
        cblkdataindex += seg->len;
2114
2115
0
        for (passno = 0; (passno < seg->real_num_passes) &&
2116
0
                (bpno_plus_one >= 1); ++passno) {
2117
0
            switch (passtype) {
2118
0
            case 0:
2119
0
                if (type == T1_TYPE_RAW) {
2120
0
                    opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2121
0
                } else {
2122
0
                    opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2123
0
                }
2124
0
                break;
2125
0
            case 1:
2126
0
                if (type == T1_TYPE_RAW) {
2127
0
                    opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2128
0
                } else {
2129
0
                    opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2130
0
                }
2131
0
                break;
2132
0
            case 2:
2133
0
                opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2134
0
                break;
2135
0
            }
2136
2137
0
            if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2138
0
                opj_mqc_resetstates(mqc);
2139
0
                opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2140
0
                opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2141
0
                opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2142
0
            }
2143
0
            if (++passtype == 3) {
2144
0
                passtype = 0;
2145
0
                bpno_plus_one--;
2146
0
            }
2147
0
        }
2148
2149
0
        opq_mqc_finish_dec(mqc);
2150
0
    }
2151
2152
0
    if (check_pterm) {
2153
0
        if (mqc->bp + 2 < mqc->end) {
2154
0
            if (p_manager_mutex) {
2155
0
                opj_mutex_lock(p_manager_mutex);
2156
0
            }
2157
0
            opj_event_msg(p_manager, EVT_WARNING,
2158
0
                          "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2159
0
                          (int)(mqc->end - mqc->bp) - 2,
2160
0
                          (int)(mqc->bp - mqc->start),
2161
0
                          (int)(mqc->end - mqc->start));
2162
0
            if (p_manager_mutex) {
2163
0
                opj_mutex_unlock(p_manager_mutex);
2164
0
            }
2165
0
        } else if (mqc->end_of_byte_stream_counter > 2) {
2166
0
            if (p_manager_mutex) {
2167
0
                opj_mutex_lock(p_manager_mutex);
2168
0
            }
2169
0
            opj_event_msg(p_manager, EVT_WARNING,
2170
0
                          "PTERM check failure: %d synthesized 0xFF markers read\n",
2171
0
                          mqc->end_of_byte_stream_counter);
2172
0
            if (p_manager_mutex) {
2173
0
                opj_mutex_unlock(p_manager_mutex);
2174
0
            }
2175
0
        }
2176
0
    }
2177
2178
    /* Restore original t1->data is needed */
2179
0
    if (cblk->decoded_data) {
2180
0
        t1->data = original_t1_data;
2181
0
    }
2182
2183
0
    return OPJ_TRUE;
2184
0
}
2185
2186
2187
typedef struct {
2188
    OPJ_UINT32 compno;
2189
    OPJ_UINT32 resno;
2190
    opj_tcd_cblk_enc_t* cblk;
2191
    opj_tcd_tile_t *tile;
2192
    opj_tcd_band_t* band;
2193
    opj_tcd_tilecomp_t* tilec;
2194
    opj_tccp_t* tccp;
2195
    const OPJ_FLOAT64 * mct_norms;
2196
    OPJ_UINT32 mct_numcomps;
2197
    volatile OPJ_BOOL* pret;
2198
    opj_mutex_t* mutex;
2199
} opj_t1_cblk_encode_processing_job_t;
2200
2201
/** Procedure to deal with a asynchronous code-block encoding job.
2202
 *
2203
 * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2204
 * @param tls       TLS handle.
2205
 */
2206
static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
2207
17.3k
{
2208
17.3k
    opj_t1_cblk_encode_processing_job_t* job =
2209
17.3k
        (opj_t1_cblk_encode_processing_job_t*)user_data;
2210
17.3k
    opj_tcd_cblk_enc_t* cblk = job->cblk;
2211
17.3k
    const opj_tcd_band_t* band = job->band;
2212
17.3k
    const opj_tcd_tilecomp_t* tilec = job->tilec;
2213
17.3k
    const opj_tccp_t* tccp = job->tccp;
2214
17.3k
    const OPJ_UINT32 resno = job->resno;
2215
17.3k
    opj_t1_t* t1;
2216
17.3k
    const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2217
2218
17.3k
    OPJ_INT32* OPJ_RESTRICT tiledp;
2219
17.3k
    OPJ_UINT32 cblk_w;
2220
17.3k
    OPJ_UINT32 cblk_h;
2221
17.3k
    OPJ_UINT32 i, j;
2222
2223
17.3k
    OPJ_INT32 x = cblk->x0 - band->x0;
2224
17.3k
    OPJ_INT32 y = cblk->y0 - band->y0;
2225
2226
17.3k
    if (!*(job->pret)) {
2227
0
        opj_free(job);
2228
0
        return;
2229
0
    }
2230
2231
17.3k
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2232
17.3k
    if (t1 == NULL) {
2233
260
        t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2234
260
        opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2235
260
    }
2236
2237
17.3k
    if (band->bandno & 1) {
2238
11.0k
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2239
11.0k
        x += pres->x1 - pres->x0;
2240
11.0k
    }
2241
17.3k
    if (band->bandno & 2) {
2242
10.9k
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2243
10.9k
        y += pres->y1 - pres->y0;
2244
10.9k
    }
2245
2246
17.3k
    if (!opj_t1_allocate_buffers(
2247
17.3k
                t1,
2248
17.3k
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2249
17.3k
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2250
0
        *(job->pret) = OPJ_FALSE;
2251
0
        opj_free(job);
2252
0
        return;
2253
0
    }
2254
2255
17.3k
    cblk_w = t1->w;
2256
17.3k
    cblk_h = t1->h;
2257
2258
17.3k
    tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2259
2260
17.3k
    if (tccp->qmfbid == 1) {
2261
        /* Do multiplication on unsigned type, even if the
2262
            * underlying type is signed, to avoid potential
2263
            * int overflow on large value (the output will be
2264
            * incorrect in such situation, but whatever...)
2265
            * This assumes complement-to-2 signed integer
2266
            * representation
2267
            * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2268
            */
2269
7.08k
        OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2270
7.08k
        OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
2271
        /* Change from "natural" order to "zigzag" order of T1 passes */
2272
54.9k
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2273
#if defined(__AVX512F__)
2274
            const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13);
2275
            const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0);
2276
            OPJ_UINT32* ptr = tiledp_u;
2277
            for (i = 0; i < cblk_w / 16; ++i) {
2278
                //                      INPUT                                        OUTPUT
2279
                // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F   00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
2280
                // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F   04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
2281
                // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F   08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
2282
                // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F   0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
2283
                __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2284
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2285
                __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2286
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2287
                __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2288
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2289
                __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2290
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2291
2292
                __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2);
2293
                __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4);
2294
                __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2);
2295
                __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4);
2296
2297
                in1 = _mm512_unpacklo_epi64(tmp1, tmp2);
2298
                in2 = _mm512_unpacklo_epi64(tmp3, tmp4);
2299
                in3 = _mm512_unpackhi_epi64(tmp1, tmp2);
2300
                in4 = _mm512_unpackhi_epi64(tmp3, tmp4);
2301
2302
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1));
2303
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3));
2304
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2));
2305
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4));
2306
2307
                tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3);
2308
                tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4);
2309
2310
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1));
2311
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2));
2312
                _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1,
2313
                                    0x1));
2314
                _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2,
2315
                                    0x1));
2316
                _mm256_storeu_si256((__m256i*)(t1data + 48),
2317
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3)));
2318
                _mm256_storeu_si256((__m256i*)(t1data + 56),
2319
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4)));
2320
                t1data += 64;
2321
                ptr += 16;
2322
            }
2323
            for (i = 0; i < cblk_w % 16; ++i) {
2324
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2325
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2326
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2327
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2328
                t1data += 4;
2329
                ptr += 1;
2330
            }
2331
#elif defined(__AVX2__)
2332
            OPJ_UINT32* ptr = tiledp_u;
2333
            for (i = 0; i < cblk_w / 8; ++i) {
2334
                //          INPUT                  OUTPUT
2335
                // 00 01 02 03 04 05 06 07   00 10 20 30 01 11 21 31
2336
                // 10 11 12 13 14 15 16 17   02 12 22 32 03 13 23 33
2337
                // 20 21 22 23 24 25 26 27   04 14 24 34 05 15 25 35
2338
                // 30 31 32 33 34 35 36 37   06 16 26 36 07 17 27 37
2339
                __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2340
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2341
                __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2342
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2343
                __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2344
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2345
                __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2346
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2347
2348
                __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2);
2349
                __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4);
2350
                __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2);
2351
                __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4);
2352
2353
                in1 = _mm256_unpacklo_epi64(tmp1, tmp2);
2354
                in2 = _mm256_unpacklo_epi64(tmp3, tmp4);
2355
                in3 = _mm256_unpackhi_epi64(tmp1, tmp2);
2356
                in4 = _mm256_unpackhi_epi64(tmp3, tmp4);
2357
2358
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1));
2359
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3));
2360
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2));
2361
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4));
2362
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3,
2363
                                    0x31));
2364
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4,
2365
                                    0x31));
2366
                t1data += 32;
2367
                ptr += 8;
2368
            }
2369
            for (i = 0; i < cblk_w % 8; ++i) {
2370
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2371
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2372
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2373
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2374
                t1data += 4;
2375
                ptr += 1;
2376
            }
2377
#else
2378
1.94M
            for (i = 0; i < cblk_w; ++i) {
2379
1.89M
                t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2380
1.89M
                t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2381
1.89M
                t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2382
1.89M
                t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2383
1.89M
                t1data += 4;
2384
1.89M
            }
2385
47.9k
#endif
2386
47.9k
        }
2387
7.08k
        if (j < cblk_h) {
2388
96.9k
            for (i = 0; i < cblk_w; ++i) {
2389
92.9k
                OPJ_UINT32 k;
2390
275k
                for (k = j; k < cblk_h; k++) {
2391
182k
                    t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
2392
182k
                    t1data ++;
2393
182k
                }
2394
92.9k
            }
2395
3.92k
        }
2396
10.2k
    } else {        /* if (tccp->qmfbid == 0) */
2397
10.2k
        OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
2398
10.2k
        OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
2399
        /* Change from "natural" order to "zigzag" order of T1 passes */
2400
76.5k
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2401
2.50M
            for (i = 0; i < cblk_w; ++i) {
2402
2.44M
                t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
2403
2.44M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2404
2.44M
                t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
2405
2.44M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2406
2.44M
                t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
2407
2.44M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2408
2.44M
                t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
2409
2.44M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2410
2.44M
                t1data += 4;
2411
2.44M
            }
2412
66.2k
        }
2413
10.2k
        if (j < cblk_h) {
2414
119k
            for (i = 0; i < cblk_w; ++i) {
2415
114k
                OPJ_UINT32 k;
2416
332k
                for (k = j; k < cblk_h; k++) {
2417
217k
                    t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
2418
217k
                                                      * (1 << T1_NMSEDEC_FRACBITS));
2419
217k
                    t1data ++;
2420
217k
                }
2421
114k
            }
2422
5.60k
        }
2423
10.2k
    }
2424
2425
17.3k
    {
2426
17.3k
        OPJ_FLOAT64 cumwmsedec =
2427
17.3k
            opj_t1_encode_cblk(
2428
17.3k
                t1,
2429
17.3k
                cblk,
2430
17.3k
                band->bandno,
2431
17.3k
                job->compno,
2432
17.3k
                tilec->numresolutions - 1 - resno,
2433
17.3k
                tccp->qmfbid,
2434
17.3k
                band->stepsize,
2435
17.3k
                tccp->cblksty,
2436
17.3k
                job->tile->numcomps,
2437
17.3k
                job->mct_norms,
2438
17.3k
                job->mct_numcomps);
2439
17.3k
        if (job->mutex) {
2440
17.3k
            opj_mutex_lock(job->mutex);
2441
17.3k
        }
2442
17.3k
        job->tile->distotile += cumwmsedec;
2443
17.3k
        if (job->mutex) {
2444
17.3k
            opj_mutex_unlock(job->mutex);
2445
17.3k
        }
2446
17.3k
    }
2447
2448
17.3k
    opj_free(job);
2449
17.3k
}
2450
2451
2452
OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2453
                             opj_tcd_tile_t *tile,
2454
                             opj_tcp_t *tcp,
2455
                             const OPJ_FLOAT64 * mct_norms,
2456
                             OPJ_UINT32 mct_numcomps
2457
                            )
2458
260
{
2459
260
    volatile OPJ_BOOL ret = OPJ_TRUE;
2460
260
    opj_thread_pool_t* tp = tcd->thread_pool;
2461
260
    OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2462
260
    opj_mutex_t* mutex = opj_mutex_create();
2463
2464
260
    tile->distotile = 0;
2465
2466
1.04k
    for (compno = 0; compno < tile->numcomps; ++compno) {
2467
780
        opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2468
780
        opj_tccp_t* tccp = &tcp->tccps[compno];
2469
2470
5.46k
        for (resno = 0; resno < tilec->numresolutions; ++resno) {
2471
4.68k
            opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2472
2473
17.1k
            for (bandno = 0; bandno < res->numbands; ++bandno) {
2474
12.4k
                opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2475
2476
                /* Skip empty bands */
2477
12.4k
                if (opj_tcd_is_band_empty(band)) {
2478
0
                    continue;
2479
0
                }
2480
24.9k
                for (precno = 0; precno < res->pw * res->ph; ++precno) {
2481
12.4k
                    opj_tcd_precinct_t *prc = &band->precincts[precno];
2482
2483
29.8k
                    for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2484
17.3k
                        opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2485
2486
17.3k
                        opj_t1_cblk_encode_processing_job_t* job =
2487
17.3k
                            (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2488
17.3k
                                    sizeof(opj_t1_cblk_encode_processing_job_t));
2489
17.3k
                        if (!job) {
2490
0
                            ret = OPJ_FALSE;
2491
0
                            goto end;
2492
0
                        }
2493
17.3k
                        job->compno = compno;
2494
17.3k
                        job->tile = tile;
2495
17.3k
                        job->resno = resno;
2496
17.3k
                        job->cblk = cblk;
2497
17.3k
                        job->band = band;
2498
17.3k
                        job->tilec = tilec;
2499
17.3k
                        job->tccp = tccp;
2500
17.3k
                        job->mct_norms = mct_norms;
2501
17.3k
                        job->mct_numcomps = mct_numcomps;
2502
17.3k
                        job->pret = &ret;
2503
17.3k
                        job->mutex = mutex;
2504
17.3k
                        opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
2505
2506
17.3k
                    } /* cblkno */
2507
12.4k
                } /* precno */
2508
12.4k
            } /* bandno */
2509
4.68k
        } /* resno  */
2510
780
    } /* compno  */
2511
2512
260
end:
2513
260
    opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2514
260
    if (mutex) {
2515
260
        opj_mutex_destroy(mutex);
2516
260
    }
2517
2518
260
    return ret;
2519
260
}
2520
2521
/* Returns whether the pass (bpno, passtype) is terminated */
2522
static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2523
                                   OPJ_UINT32 cblksty,
2524
                                   OPJ_INT32 bpno,
2525
                                   OPJ_UINT32 passtype)
2526
24.1k
{
2527
    /* Is it the last cleanup pass ? */
2528
24.1k
    if (passtype == 2 && bpno == 0) {
2529
780
        return OPJ_TRUE;
2530
780
    }
2531
2532
23.3k
    if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2533
0
        return OPJ_TRUE;
2534
0
    }
2535
2536
23.3k
    if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2537
        /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2538
0
        if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2539
0
            return OPJ_TRUE;
2540
0
        }
2541
        /* and beyond terminate all the magnitude refinement passes (in raw) */
2542
        /* and cleanup passes (in MQC) */
2543
0
        if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2544
0
            return OPJ_TRUE;
2545
0
        }
2546
0
    }
2547
2548
23.3k
    return OPJ_FALSE;
2549
23.3k
}
2550
2551
2552
static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2553
                                      opj_tcd_cblk_enc_t* cblk,
2554
                                      OPJ_UINT32 orient,
2555
                                      OPJ_UINT32 compno,
2556
                                      OPJ_UINT32 level,
2557
                                      OPJ_UINT32 qmfbid,
2558
                                      OPJ_FLOAT64 stepsize,
2559
                                      OPJ_UINT32 cblksty,
2560
                                      OPJ_UINT32 numcomps,
2561
                                      const OPJ_FLOAT64 * mct_norms,
2562
                                      OPJ_UINT32 mct_numcomps)
2563
17.3k
{
2564
17.3k
    OPJ_FLOAT64 cumwmsedec = 0.0;
2565
2566
17.3k
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2567
2568
17.3k
    OPJ_UINT32 passno;
2569
17.3k
    OPJ_INT32 bpno;
2570
17.3k
    OPJ_UINT32 passtype;
2571
17.3k
    OPJ_INT32 nmsedec = 0;
2572
17.3k
    OPJ_INT32 max;
2573
17.3k
    OPJ_UINT32 i, j;
2574
17.3k
    OPJ_BYTE type = T1_TYPE_MQ;
2575
17.3k
    OPJ_FLOAT64 tempwmsedec;
2576
17.3k
    OPJ_INT32* datap;
2577
2578
#ifdef EXTRA_DEBUG
2579
    printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2580
           cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2581
#endif
2582
2583
17.3k
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2584
2585
17.3k
    max = 0;
2586
17.3k
    datap = t1->data;
2587
492k
    for (j = 0; j < t1->h; ++j) {
2588
474k
        const OPJ_UINT32 w = t1->w;
2589
18.2M
        for (i = 0; i < w; ++i, ++datap) {
2590
17.7M
            OPJ_INT32 tmp = *datap;
2591
17.7M
            if (tmp < 0) {
2592
21.1k
                OPJ_UINT32 tmp_unsigned;
2593
21.1k
                if (tmp == INT_MIN) {
2594
                    /* To avoid undefined behaviour when negating INT_MIN */
2595
                    /* but if we go here, it means we have supplied an input */
2596
                    /* with more bit depth than we we can really support. */
2597
                    /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */
2598
0
                    tmp = INT_MIN + 1;
2599
0
                }
2600
21.1k
                max = opj_int_max(max, -tmp);
2601
21.1k
                tmp_unsigned = opj_to_smr(tmp);
2602
21.1k
                memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
2603
17.7M
            } else {
2604
17.7M
                max = opj_int_max(max, tmp);
2605
17.7M
            }
2606
17.7M
        }
2607
474k
    }
2608
2609
17.3k
    cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2610
16.5k
                                      T1_NMSEDEC_FRACBITS) : 0;
2611
17.3k
    if (cblk->numbps == 0) {
2612
16.5k
        cblk->totalpasses = 0;
2613
16.5k
        return cumwmsedec;
2614
16.5k
    }
2615
2616
780
    bpno = (OPJ_INT32)(cblk->numbps - 1);
2617
780
    passtype = 2;
2618
2619
780
    opj_mqc_resetstates(mqc);
2620
780
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2621
780
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2622
780
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2623
780
    opj_mqc_init_enc(mqc, cblk->data);
2624
2625
24.9k
    for (passno = 0; bpno >= 0; ++passno) {
2626
24.1k
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2627
24.1k
        type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2628
24.1k
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2629
2630
        /* If the previous pass was terminating, we need to reset the encoder */
2631
24.1k
        if (passno > 0 && cblk->passes[passno - 1].term) {
2632
0
            if (type == T1_TYPE_RAW) {
2633
0
                opj_mqc_bypass_init_enc(mqc);
2634
0
            } else {
2635
0
                opj_mqc_restart_init_enc(mqc);
2636
0
            }
2637
0
        }
2638
2639
24.1k
        switch (passtype) {
2640
7.78k
        case 0:
2641
7.78k
            opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2642
7.78k
            break;
2643
7.78k
        case 1:
2644
7.78k
            opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2645
7.78k
            break;
2646
8.56k
        case 2:
2647
8.56k
            opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2648
            /* code switch SEGMARK (i.e. SEGSYM) */
2649
8.56k
            if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2650
0
                opj_mqc_segmark_enc(mqc);
2651
0
            }
2652
8.56k
            break;
2653
24.1k
        }
2654
2655
24.1k
        tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2656
24.1k
                                        stepsize, numcomps, mct_norms, mct_numcomps) ;
2657
24.1k
        cumwmsedec += tempwmsedec;
2658
24.1k
        pass->distortiondec = cumwmsedec;
2659
2660
24.1k
        if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2661
            /* If it is a terminated pass, terminate it */
2662
780
            if (type == T1_TYPE_RAW) {
2663
0
                opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2664
780
            } else {
2665
780
                if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2666
0
                    opj_mqc_erterm_enc(mqc);
2667
780
                } else {
2668
780
                    opj_mqc_flush(mqc);
2669
780
                }
2670
780
            }
2671
780
            pass->term = 1;
2672
780
            pass->rate = opj_mqc_numbytes(mqc);
2673
23.3k
        } else {
2674
            /* Non terminated pass */
2675
23.3k
            OPJ_UINT32 rate_extra_bytes;
2676
23.3k
            if (type == T1_TYPE_RAW) {
2677
0
                rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2678
0
                                       mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2679
23.3k
            } else {
2680
23.3k
                rate_extra_bytes = 3;
2681
23.3k
            }
2682
23.3k
            pass->term = 0;
2683
23.3k
            pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2684
23.3k
        }
2685
2686
24.1k
        if (++passtype == 3) {
2687
8.56k
            passtype = 0;
2688
8.56k
            bpno--;
2689
8.56k
        }
2690
2691
        /* Code-switch "RESET" */
2692
24.1k
        if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2693
0
            opj_mqc_reset_enc(mqc);
2694
0
        }
2695
24.1k
    }
2696
2697
780
    cblk->totalpasses = passno;
2698
2699
780
    if (cblk->totalpasses) {
2700
        /* Make sure that pass rates are increasing */
2701
780
        OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2702
24.9k
        for (passno = cblk->totalpasses; passno > 0;) {
2703
24.1k
            opj_tcd_pass_t *pass = &cblk->passes[--passno];
2704
24.1k
            if (pass->rate > last_pass_rate) {
2705
3.95k
                pass->rate = last_pass_rate;
2706
20.1k
            } else {
2707
20.1k
                last_pass_rate = pass->rate;
2708
20.1k
            }
2709
24.1k
        }
2710
780
    }
2711
2712
24.9k
    for (passno = 0; passno < cblk->totalpasses; passno++) {
2713
24.1k
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2714
2715
        /* Prevent generation of FF as last data byte of a pass*/
2716
        /* For terminating passes, the flushing procedure ensured this already */
2717
24.1k
        assert(pass->rate > 0);
2718
24.1k
        if (cblk->data[pass->rate - 1] == 0xFF) {
2719
315
            pass->rate--;
2720
315
        }
2721
24.1k
        pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2722
24.1k
    }
2723
2724
#ifdef EXTRA_DEBUG
2725
    printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2726
2727
    /* Check that there not 0xff >=0x90 sequences */
2728
    if (cblk->totalpasses) {
2729
        OPJ_UINT32 i;
2730
        OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2731
        for (i = 1; i < len; ++i) {
2732
            if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2733
                printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2734
                abort();
2735
            }
2736
        }
2737
    }
2738
#endif
2739
2740
780
    return cumwmsedec;
2741
780
}