Coverage Report

Created: 2026-03-31 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openjpeg/src/lib/openjp2/t1.c
Line
Count
Source
1
/*
2
 * The copyright in this software is being made available under the 2-clauses
3
 * BSD License, included below. This software may be subject to other third
4
 * party and contributor rights, including patent rights, and no such rights
5
 * are granted under this license.
6
 *
7
 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8
 * Copyright (c) 2002-2014, Professor Benoit Macq
9
 * Copyright (c) 2001-2003, David Janssens
10
 * Copyright (c) 2002-2003, Yannick Verschueren
11
 * Copyright (c) 2003-2007, Francois-Olivier Devaux
12
 * Copyright (c) 2003-2014, Antonin Descampe
13
 * Copyright (c) 2005, Herve Drolon, FreeImage Team
14
 * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15
 * Copyright (c) 2012, Carl Hetherington
16
 * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17
 * All rights reserved.
18
 *
19
 * Redistribution and use in source and binary forms, with or without
20
 * modification, are permitted provided that the following conditions
21
 * are met:
22
 * 1. Redistributions of source code must retain the above copyright
23
 *    notice, this list of conditions and the following disclaimer.
24
 * 2. Redistributions in binary form must reproduce the above copyright
25
 *    notice, this list of conditions and the following disclaimer in the
26
 *    documentation and/or other materials provided with the distribution.
27
 *
28
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
 * POSSIBILITY OF SUCH DAMAGE.
39
 */
40
41
#define OPJ_SKIP_POISON
42
#include "opj_includes.h"
43
44
#ifdef __SSE__
45
#include <xmmintrin.h>
46
#endif
47
#ifdef __SSE2__
48
#include <emmintrin.h>
49
#endif
50
#if (defined(__AVX2__) || defined(__AVX512F__))
51
#include <immintrin.h>
52
#endif
53
54
#if defined(__GNUC__)
55
#pragma GCC poison malloc calloc realloc free
56
#endif
57
58
#include "t1_luts.h"
59
60
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
61
/*@{*/
62
63
25.3M
#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
64
65
13.7G
#define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
66
67
/* Macros to deal with signed integer with just MSB bit set for
68
 * negative values (smr = signed magnitude representation) */
69
17.4G
#define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
70
532M
#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
71
304M
#define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
72
73
74
/** @name Local static functions */
75
/*@{*/
76
77
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
78
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
79
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
80
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
81
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
82
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
83
                                       OPJ_UINT32 vsc);
84
85
86
/**
87
Decode significant pass
88
*/
89
90
static INLINE void opj_t1_dec_sigpass_step_raw(
91
    opj_t1_t *t1,
92
    opj_flag_t *flagsp,
93
    OPJ_INT32 *datap,
94
    OPJ_INT32 oneplushalf,
95
    OPJ_UINT32 vsc,
96
    OPJ_UINT32 row);
97
static INLINE void opj_t1_dec_sigpass_step_mqc(
98
    opj_t1_t *t1,
99
    opj_flag_t *flagsp,
100
    OPJ_INT32 *datap,
101
    OPJ_INT32 oneplushalf,
102
    OPJ_UINT32 row,
103
    OPJ_UINT32 flags_stride,
104
    OPJ_UINT32 vsc);
105
106
/**
107
Encode significant pass
108
*/
109
static void opj_t1_enc_sigpass(opj_t1_t *t1,
110
                               OPJ_INT32 bpno,
111
                               OPJ_INT32 *nmsedec,
112
                               OPJ_BYTE type,
113
                               OPJ_UINT32 cblksty);
114
115
/**
116
Decode significant pass
117
*/
118
static void opj_t1_dec_sigpass_raw(
119
    opj_t1_t *t1,
120
    OPJ_INT32 bpno,
121
    OPJ_INT32 cblksty);
122
123
/**
124
Encode refinement pass
125
*/
126
static void opj_t1_enc_refpass(opj_t1_t *t1,
127
                               OPJ_INT32 bpno,
128
                               OPJ_INT32 *nmsedec,
129
                               OPJ_BYTE type);
130
131
/**
132
Decode refinement pass
133
*/
134
static void opj_t1_dec_refpass_raw(
135
    opj_t1_t *t1,
136
    OPJ_INT32 bpno);
137
138
139
/**
140
Decode refinement pass
141
*/
142
143
static INLINE void  opj_t1_dec_refpass_step_raw(
144
    opj_t1_t *t1,
145
    opj_flag_t *flagsp,
146
    OPJ_INT32 *datap,
147
    OPJ_INT32 poshalf,
148
    OPJ_UINT32 row);
149
static INLINE void opj_t1_dec_refpass_step_mqc(
150
    opj_t1_t *t1,
151
    opj_flag_t *flagsp,
152
    OPJ_INT32 *datap,
153
    OPJ_INT32 poshalf,
154
    OPJ_UINT32 row);
155
156
157
/**
158
Decode clean-up pass
159
*/
160
161
static void opj_t1_dec_clnpass_step(
162
    opj_t1_t *t1,
163
    opj_flag_t *flagsp,
164
    OPJ_INT32 *datap,
165
    OPJ_INT32 oneplushalf,
166
    OPJ_UINT32 row,
167
    OPJ_UINT32 vsc);
168
169
/**
170
Encode clean-up pass
171
*/
172
static void opj_t1_enc_clnpass(
173
    opj_t1_t *t1,
174
    OPJ_INT32 bpno,
175
    OPJ_INT32 *nmsedec,
176
    OPJ_UINT32 cblksty);
177
178
static OPJ_FLOAT64 opj_t1_getwmsedec(
179
    OPJ_INT32 nmsedec,
180
    OPJ_UINT32 compno,
181
    OPJ_UINT32 level,
182
    OPJ_UINT32 orient,
183
    OPJ_INT32 bpno,
184
    OPJ_UINT32 qmfbid,
185
    OPJ_FLOAT64 stepsize,
186
    OPJ_UINT32 numcomps,
187
    const OPJ_FLOAT64 * mct_norms,
188
    OPJ_UINT32 mct_numcomps);
189
190
/** Return "cumwmsedec" that should be used to increase tile->distotile */
191
static double opj_t1_encode_cblk(opj_t1_t *t1,
192
                                 opj_tcd_cblk_enc_t* cblk,
193
                                 OPJ_UINT32 orient,
194
                                 OPJ_UINT32 compno,
195
                                 OPJ_UINT32 level,
196
                                 OPJ_UINT32 qmfbid,
197
                                 OPJ_FLOAT64 stepsize,
198
                                 OPJ_UINT32 cblksty,
199
                                 OPJ_UINT32 numcomps,
200
                                 const OPJ_FLOAT64 * mct_norms,
201
                                 OPJ_UINT32 mct_numcomps);
202
203
/**
204
Decode 1 code-block
205
@param t1 T1 handle
206
@param cblk Code-block coding parameters
207
@param orient
208
@param roishift Region of interest shifting value
209
@param cblksty Code-block style
210
@param p_manager the event manager
211
@param p_manager_mutex mutex for the event manager
212
@param check_pterm whether PTERM correct termination should be checked
213
*/
214
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
215
                                   opj_tcd_cblk_dec_t* cblk,
216
                                   OPJ_UINT32 orient,
217
                                   OPJ_UINT32 roishift,
218
                                   OPJ_UINT32 cblksty,
219
                                   opj_event_mgr_t *p_manager,
220
                                   opj_mutex_t* p_manager_mutex,
221
                                   OPJ_BOOL check_pterm);
222
223
/**
224
Decode 1 HT code-block
225
@param t1 T1 handle
226
@param cblk Code-block coding parameters
227
@param orient
228
@param roishift Region of interest shifting value
229
@param cblksty Code-block style
230
@param p_manager the event manager
231
@param p_manager_mutex mutex for the event manager
232
@param check_pterm whether PTERM correct termination should be checked
233
*/
234
OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
235
                               opj_tcd_cblk_dec_t* cblk,
236
                               OPJ_UINT32 orient,
237
                               OPJ_UINT32 roishift,
238
                               OPJ_UINT32 cblksty,
239
                               opj_event_mgr_t *p_manager,
240
                               opj_mutex_t* p_manager_mutex,
241
                               OPJ_BOOL check_pterm);
242
243
244
static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
245
                                        OPJ_UINT32 w,
246
                                        OPJ_UINT32 h);
247
248
/*@}*/
249
250
/*@}*/
251
252
/* ----------------------------------------------------------------------- */
253
254
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
255
4.69G
{
256
4.69G
    return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
257
4.69G
}
258
259
static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
260
        OPJ_UINT32 pfX,
261
        OPJ_UINT32 nfX,
262
        OPJ_UINT32 ci)
263
987M
{
264
    /*
265
      0 pfX T1_CHI_THIS           T1_LUT_SGN_W
266
      1 tfX T1_SIGMA_1            T1_LUT_SIG_N
267
      2 nfX T1_CHI_THIS           T1_LUT_SGN_E
268
      3 tfX T1_SIGMA_3            T1_LUT_SIG_W
269
      4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
270
      5 tfX T1_SIGMA_5            T1_LUT_SIG_E
271
      6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
272
      7 tfX T1_SIGMA_7            T1_LUT_SIG_S
273
    */
274
275
987M
    OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
276
987M
                                         T1_SIGMA_7);
277
278
987M
    lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
279
987M
    lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
280
987M
    if (ci == 0U) {
281
249M
        lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
282
737M
    } else {
283
737M
        lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
284
737M
    }
285
987M
    lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
286
987M
    return lu;
287
987M
}
288
289
static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
290
987M
{
291
987M
    return lut_ctxno_sc[lu];
292
987M
}
293
294
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
295
5.31G
{
296
5.31G
    OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
297
5.31G
    OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
298
5.31G
    return tmp2;
299
5.31G
}
300
301
static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
302
987M
{
303
987M
    return lut_spb[lu];
304
987M
}
305
306
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
307
532M
{
308
532M
    if (bitpos > 0) {
309
483M
        return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
310
483M
    }
311
312
49.0M
    return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
313
532M
}
314
315
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
316
3.88G
{
317
3.88G
    if (bitpos > 0) {
318
3.40G
        return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
319
3.40G
    }
320
321
483M
    return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
322
3.88G
}
323
324
991M
#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
325
991M
{ \
326
991M
    /* east */ \
327
991M
    flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
328
991M
 \
329
991M
    /* mark target as significant */ \
330
991M
    flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
331
991M
 \
332
991M
    /* west */ \
333
991M
    flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
334
991M
 \
335
991M
    /* north-west, north, north-east */ \
336
991M
    if (ci == 0U && !(vsc)) { \
337
191M
        opj_flag_t* north = flagsp - (stride); \
338
191M
        *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
339
191M
        north[-1] |= T1_SIGMA_17; \
340
191M
        north[1] |= T1_SIGMA_15; \
341
191M
    } \
342
991M
 \
343
991M
    /* south-west, south, south-east */ \
344
991M
    if (ci == 3U) { \
345
247M
        opj_flag_t* south = flagsp + (stride); \
346
247M
        *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
347
247M
        south[-1] |= T1_SIGMA_2; \
348
247M
        south[1] |= T1_SIGMA_0; \
349
247M
    } \
350
991M
}
351
352
353
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
354
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
355
                                       OPJ_UINT32 vsc)
356
536M
{
357
536M
    opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
358
536M
}
359
360
/**
361
Encode significant pass
362
*/
363
7.09G
#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
364
7.09G
{ \
365
7.09G
    OPJ_UINT32 v; \
366
7.09G
    const OPJ_UINT32 ci = (ciIn); \
367
7.09G
    const OPJ_UINT32 vsc = (vscIn); \
368
7.09G
    const OPJ_INT32* l_datap = (datapIn); \
369
7.09G
    opj_flag_t* flagsp = (flagspIn); \
370
7.09G
    OPJ_UINT32 const flags = *flagsp; \
371
7.09G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
372
7.09G
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
373
2.26G
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
374
2.26G
        v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
375
2.26G
/* #ifdef DEBUG_ENC_SIG */ \
376
2.26G
/*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
377
2.26G
/* #endif */ \
378
2.26G
        opj_t1_setcurctx(curctx, ctxt1); \
379
2.26G
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
380
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
381
2.26G
        } else { \
382
2.26G
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
383
2.26G
        } \
384
2.26G
        if (v) { \
385
418M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
386
418M
                                *flagsp, \
387
418M
                                flagsp[-1], flagsp[1], \
388
418M
                                ci); \
389
418M
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
390
418M
            v = opj_smr_sign(*l_datap); \
391
418M
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
392
418M
                                              (OPJ_UINT32)bpno); \
393
418M
/* #ifdef DEBUG_ENC_SIG */ \
394
418M
/*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
395
418M
/* #endif */ \
396
418M
            opj_t1_setcurctx(curctx, ctxt2); \
397
418M
            if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
398
0
                opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
399
418M
            } else { \
400
418M
                OPJ_UINT32 spb = opj_t1_getspb(lu); \
401
418M
/* #ifdef DEBUG_ENC_SIG */ \
402
418M
/*                fprintf(stderr, "   spb=%d\n", spb); */ \
403
418M
/* #endif */ \
404
418M
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
405
418M
            } \
406
418M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
407
418M
        } \
408
2.26G
        *flagsp |= T1_PI_THIS << (ci * 3U); \
409
2.26G
    } \
410
7.09G
}
411
412
static INLINE void opj_t1_dec_sigpass_step_raw(
413
    opj_t1_t *t1,
414
    opj_flag_t *flagsp,
415
    OPJ_INT32 *datap,
416
    OPJ_INT32 oneplushalf,
417
    OPJ_UINT32 vsc,
418
    OPJ_UINT32 ci)
419
48.6M
{
420
48.6M
    OPJ_UINT32 v;
421
48.6M
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
422
423
48.6M
    OPJ_UINT32 const flags = *flagsp;
424
425
48.6M
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
426
4.83M
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
427
4.05M
        if (opj_mqc_raw_decode(mqc)) {
428
3.67M
            v = opj_mqc_raw_decode(mqc);
429
3.67M
            *datap = v ? -oneplushalf : oneplushalf;
430
3.67M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
431
3.67M
        }
432
4.05M
        *flagsp |= T1_PI_THIS << (ci * 3U);
433
4.05M
    }
434
48.6M
}
435
436
#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
437
                                          data_stride, ci, mqc, curctx, \
438
1.98G
                                          v, a, c, ct, oneplushalf, vsc) \
439
1.98G
{ \
440
1.98G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
441
1.98G
        (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
442
459M
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
443
459M
        opj_t1_setcurctx(curctx, ctxt1); \
444
459M
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
445
459M
        if (v) { \
446
212M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
447
212M
                                flags, \
448
212M
                                flagsp[-1], flagsp[1], \
449
212M
                                ci); \
450
212M
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
451
212M
            OPJ_UINT32 spb = opj_t1_getspb(lu); \
452
212M
            opj_t1_setcurctx(curctx, ctxt2); \
453
212M
            opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
454
212M
            v = v ^ spb; \
455
212M
            data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
456
212M
            opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
457
212M
        } \
458
459M
        flags |= T1_PI_THIS << (ci * 3U); \
459
459M
    } \
460
1.98G
}
461
462
static INLINE void opj_t1_dec_sigpass_step_mqc(
463
    opj_t1_t *t1,
464
    opj_flag_t *flagsp,
465
    OPJ_INT32 *datap,
466
    OPJ_INT32 oneplushalf,
467
    OPJ_UINT32 ci,
468
    OPJ_UINT32 flags_stride,
469
    OPJ_UINT32 vsc)
470
22.2M
{
471
22.2M
    OPJ_UINT32 v;
472
473
22.2M
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
474
22.2M
    opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
475
22.2M
                                      0, ci, mqc, mqc->curctx,
476
22.2M
                                      v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
477
22.2M
}
478
479
static void opj_t1_enc_sigpass(opj_t1_t *t1,
480
                               OPJ_INT32 bpno,
481
                               OPJ_INT32 *nmsedec,
482
                               OPJ_BYTE type,
483
                               OPJ_UINT32 cblksty
484
                              )
485
8.12M
{
486
8.12M
    OPJ_UINT32 i, k;
487
8.12M
    OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
488
8.12M
    opj_flag_t* f = &T1_FLAGS(0, 0);
489
8.12M
    OPJ_UINT32 const extra = 2;
490
8.12M
    opj_mqc_t* mqc = &(t1->mqc);
491
8.12M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
492
8.12M
    const OPJ_INT32* datap = t1->data;
493
494
8.12M
    *nmsedec = 0;
495
#ifdef DEBUG_ENC_SIG
496
    fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
497
#endif
498
89.3M
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
499
81.2M
        const OPJ_UINT32 w = t1->w;
500
#ifdef DEBUG_ENC_SIG
501
        fprintf(stderr, " k=%d\n", k);
502
#endif
503
3.90G
        for (i = 0; i < w; ++i, ++f, datap += 4) {
504
#ifdef DEBUG_ENC_SIG
505
            fprintf(stderr, " i=%d\n", i);
506
#endif
507
3.82G
            if (*f == 0U) {
508
                /* Nothing to do for any of the 4 data points */
509
2.06G
                continue;
510
2.06G
            }
511
1.76G
            opj_t1_enc_sigpass_step_macro(
512
1.76G
                mqc, curctx, a, c, ct,
513
1.76G
                f,
514
1.76G
                &datap[0],
515
1.76G
                bpno,
516
1.76G
                one,
517
1.76G
                nmsedec,
518
1.76G
                type,
519
1.76G
                0, cblksty & J2K_CCP_CBLKSTY_VSC);
520
1.76G
            opj_t1_enc_sigpass_step_macro(
521
1.76G
                mqc, curctx, a, c, ct,
522
1.76G
                f,
523
1.76G
                &datap[1],
524
1.76G
                bpno,
525
1.76G
                one,
526
1.76G
                nmsedec,
527
1.76G
                type,
528
1.76G
                1, 0);
529
1.76G
            opj_t1_enc_sigpass_step_macro(
530
1.76G
                mqc, curctx, a, c, ct,
531
1.76G
                f,
532
1.76G
                &datap[2],
533
1.76G
                bpno,
534
1.76G
                one,
535
1.76G
                nmsedec,
536
1.76G
                type,
537
1.76G
                2, 0);
538
1.76G
            opj_t1_enc_sigpass_step_macro(
539
1.76G
                mqc, curctx, a, c, ct,
540
1.76G
                f,
541
1.76G
                &datap[3],
542
1.76G
                bpno,
543
1.76G
                one,
544
1.76G
                nmsedec,
545
1.76G
                type,
546
1.76G
                3, 0);
547
1.76G
        }
548
81.2M
    }
549
550
8.12M
    if (k < t1->h) {
551
840k
        OPJ_UINT32 j;
552
#ifdef DEBUG_ENC_SIG
553
        fprintf(stderr, " k=%d\n", k);
554
#endif
555
28.6M
        for (i = 0; i < t1->w; ++i, ++f) {
556
#ifdef DEBUG_ENC_SIG
557
            fprintf(stderr, " i=%d\n", i);
558
#endif
559
27.8M
            if (*f == 0U) {
560
                /* Nothing to do for any of the 4 data points */
561
0
                datap += (t1->h - k);
562
0
                continue;
563
0
            }
564
84.3M
            for (j = k; j < t1->h; ++j, ++datap) {
565
56.4M
                opj_t1_enc_sigpass_step_macro(
566
56.4M
                    mqc, curctx, a, c, ct,
567
56.4M
                    f,
568
56.4M
                    &datap[0],
569
56.4M
                    bpno,
570
56.4M
                    one,
571
56.4M
                    nmsedec,
572
56.4M
                    type,
573
56.4M
                    j - k,
574
56.4M
                    (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
575
56.4M
            }
576
27.8M
        }
577
840k
    }
578
579
8.12M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
580
8.12M
}
581
582
static void opj_t1_dec_sigpass_raw(
583
    opj_t1_t *t1,
584
    OPJ_INT32 bpno,
585
    OPJ_INT32 cblksty)
586
34.1k
{
587
34.1k
    OPJ_INT32 one, half, oneplushalf;
588
34.1k
    OPJ_UINT32 i, j, k;
589
34.1k
    OPJ_INT32 *data = t1->data;
590
34.1k
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
591
34.1k
    const OPJ_UINT32 l_w = t1->w;
592
34.1k
    one = 1 << bpno;
593
34.1k
    half = one >> 1;
594
34.1k
    oneplushalf = one | half;
595
596
371k
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
597
17.0M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
598
16.6M
            opj_flag_t flags = *flagsp;
599
16.6M
            if (flags != 0) {
600
11.2M
                opj_t1_dec_sigpass_step_raw(
601
11.2M
                    t1,
602
11.2M
                    flagsp,
603
11.2M
                    data,
604
11.2M
                    oneplushalf,
605
11.2M
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
606
11.2M
                    0U);
607
11.2M
                opj_t1_dec_sigpass_step_raw(
608
11.2M
                    t1,
609
11.2M
                    flagsp,
610
11.2M
                    data + l_w,
611
11.2M
                    oneplushalf,
612
11.2M
                    OPJ_FALSE, /* vsc */
613
11.2M
                    1U);
614
11.2M
                opj_t1_dec_sigpass_step_raw(
615
11.2M
                    t1,
616
11.2M
                    flagsp,
617
11.2M
                    data + 2 * l_w,
618
11.2M
                    oneplushalf,
619
11.2M
                    OPJ_FALSE, /* vsc */
620
11.2M
                    2U);
621
11.2M
                opj_t1_dec_sigpass_step_raw(
622
11.2M
                    t1,
623
11.2M
                    flagsp,
624
11.2M
                    data + 3 * l_w,
625
11.2M
                    oneplushalf,
626
11.2M
                    OPJ_FALSE, /* vsc */
627
11.2M
                    3U);
628
11.2M
            }
629
16.6M
        }
630
337k
    }
631
34.1k
    if (k < t1->h) {
632
1.96M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
633
5.68M
            for (j = 0; j < t1->h - k; ++j) {
634
3.73M
                opj_t1_dec_sigpass_step_raw(
635
3.73M
                    t1,
636
3.73M
                    flagsp,
637
3.73M
                    data + j * l_w,
638
3.73M
                    oneplushalf,
639
3.73M
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
640
3.73M
                    j);
641
3.73M
            }
642
1.94M
        }
643
17.9k
    }
644
34.1k
}
645
646
1.81M
#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
647
1.81M
{ \
648
1.81M
        OPJ_INT32 one, half, oneplushalf; \
649
1.81M
        OPJ_UINT32 i, j, k; \
650
1.81M
        register OPJ_INT32 *data = t1->data; \
651
1.81M
        register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
652
1.81M
        const OPJ_UINT32 l_w = w; \
653
1.81M
        opj_mqc_t* mqc = &(t1->mqc); \
654
1.81M
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
655
1.81M
        register OPJ_UINT32 v; \
656
1.81M
        one = 1 << bpno; \
657
1.81M
        half = one >> 1; \
658
1.81M
        oneplushalf = one | half; \
659
22.2M
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
660
806M
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
661
785M
                        opj_flag_t flags = *flagsp; \
662
785M
                        if( flags != 0 ) { \
663
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
664
490M
                                flags, flagsp, flags_stride, data, \
665
490M
                                l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
666
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
667
490M
                                flags, flagsp, flags_stride, data, \
668
490M
                                l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
669
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
670
490M
                                flags, flagsp, flags_stride, data, \
671
490M
                                l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
672
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
673
490M
                                flags, flagsp, flags_stride, data, \
674
490M
                                l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
675
490M
                            *flagsp = flags; \
676
490M
                        } \
677
785M
                } \
678
20.4M
        } \
679
1.81M
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
680
1.81M
        if( k < h ) { \
681
14.7M
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
682
36.4M
                for (j = 0; j < h - k; ++j) { \
683
22.2M
                        opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
684
22.2M
                            data + j * l_w, oneplushalf, j, flags_stride, vsc); \
685
22.2M
                } \
686
14.1M
            } \
687
647k
        } \
688
1.81M
}
689
690
static void opj_t1_dec_sigpass_mqc_64x64_novsc(
691
    opj_t1_t *t1,
692
    OPJ_INT32 bpno)
693
314k
{
694
314k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
695
314k
}
696
697
static void opj_t1_dec_sigpass_mqc_64x64_vsc(
698
    opj_t1_t *t1,
699
    OPJ_INT32 bpno)
700
339k
{
701
339k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
702
339k
}
703
704
static void opj_t1_dec_sigpass_mqc_generic_novsc(
705
    opj_t1_t *t1,
706
    OPJ_INT32 bpno)
707
589k
{
708
589k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
709
589k
                                    t1->w + 2U);
710
589k
}
711
712
static void opj_t1_dec_sigpass_mqc_generic_vsc(
713
    opj_t1_t *t1,
714
    OPJ_INT32 bpno)
715
575k
{
716
575k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
717
575k
                                    t1->w + 2U);
718
575k
}
719
720
static void opj_t1_dec_sigpass_mqc(
721
    opj_t1_t *t1,
722
    OPJ_INT32 bpno,
723
    OPJ_INT32 cblksty)
724
1.81M
{
725
1.81M
    if (t1->w == 64 && t1->h == 64) {
726
653k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
727
339k
            opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
728
339k
        } else {
729
314k
            opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
730
314k
        }
731
1.16M
    } else {
732
1.16M
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
733
575k
            opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
734
589k
        } else {
735
589k
            opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
736
589k
        }
737
1.16M
    }
738
1.81M
}
739
740
/**
741
Encode refinement pass step
742
*/
743
5.56G
#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
744
5.56G
{\
745
5.56G
    OPJ_UINT32 v; \
746
5.56G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
747
3.88G
        const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
748
3.88G
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
749
3.88G
        OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
750
3.88G
        *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
751
3.88G
                                          (OPJ_UINT32)bpno); \
752
3.88G
        v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
753
3.88G
/* #ifdef DEBUG_ENC_REF */ \
754
3.88G
/*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
755
3.88G
/* #endif */ \
756
3.88G
        opj_t1_setcurctx(curctx, ctxt); \
757
3.88G
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
758
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
759
3.88G
        } else { \
760
3.88G
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
761
3.88G
        } \
762
3.88G
        flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
763
3.88G
    } \
764
5.56G
}
765
766
767
static INLINE void opj_t1_dec_refpass_step_raw(
768
    opj_t1_t *t1,
769
    opj_flag_t *flagsp,
770
    OPJ_INT32 *datap,
771
    OPJ_INT32 poshalf,
772
    OPJ_UINT32 ci)
773
47.5M
{
774
47.5M
    OPJ_UINT32 v;
775
776
47.5M
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
777
778
47.5M
    if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
779
47.5M
            (T1_SIGMA_THIS << (ci * 3U))) {
780
42.5M
        v = opj_mqc_raw_decode(mqc);
781
42.5M
        *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
782
42.5M
        *flagsp |= T1_MU_THIS << (ci * 3U);
783
42.5M
    }
784
47.5M
}
785
786
#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
787
1.86G
                                          mqc, curctx, v, a, c, ct, poshalf) \
788
1.86G
{ \
789
1.86G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
790
1.86G
            (T1_SIGMA_THIS << (ci * 3U))) { \
791
1.42G
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
792
1.42G
        opj_t1_setcurctx(curctx, ctxt); \
793
1.42G
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
794
1.42G
        data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
795
1.42G
        flags |= T1_MU_THIS << (ci * 3U); \
796
1.42G
    } \
797
1.86G
}
798
799
static INLINE void opj_t1_dec_refpass_step_mqc(
800
    opj_t1_t *t1,
801
    opj_flag_t *flagsp,
802
    OPJ_INT32 *datap,
803
    OPJ_INT32 poshalf,
804
    OPJ_UINT32 ci)
805
21.0M
{
806
21.0M
    OPJ_UINT32 v;
807
808
21.0M
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
809
21.0M
    opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
810
21.0M
                                      mqc, mqc->curctx, v, mqc->a, mqc->c,
811
21.0M
                                      mqc->ct, poshalf);
812
21.0M
}
813
814
static void opj_t1_enc_refpass(
815
    opj_t1_t *t1,
816
    OPJ_INT32 bpno,
817
    OPJ_INT32 *nmsedec,
818
    OPJ_BYTE type)
819
8.12M
{
820
8.12M
    OPJ_UINT32 i, k;
821
8.12M
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
822
8.12M
    opj_flag_t* f = &T1_FLAGS(0, 0);
823
8.12M
    const OPJ_UINT32 extra = 2U;
824
8.12M
    opj_mqc_t* mqc = &(t1->mqc);
825
8.12M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
826
8.12M
    const OPJ_INT32* datap = t1->data;
827
828
8.12M
    *nmsedec = 0;
829
#ifdef DEBUG_ENC_REF
830
    fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
831
#endif
832
89.3M
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
833
#ifdef DEBUG_ENC_REF
834
        fprintf(stderr, " k=%d\n", k);
835
#endif
836
3.90G
        for (i = 0; i < t1->w; ++i, f++, datap += 4) {
837
3.82G
            const OPJ_UINT32 flags = *f;
838
3.82G
            OPJ_UINT32 flagsUpdated = flags;
839
#ifdef DEBUG_ENC_REF
840
            fprintf(stderr, " i=%d\n", i);
841
#endif
842
3.82G
            if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
843
                /* none significant */
844
2.39G
                continue;
845
2.39G
            }
846
1.43G
            if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
847
1.43G
                    (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
848
                /* all processed by sigpass */
849
50.3M
                continue;
850
50.3M
            }
851
852
1.38G
            opj_t1_enc_refpass_step_macro(
853
1.38G
                mqc, curctx, a, c, ct,
854
1.38G
                flags, flagsUpdated,
855
1.38G
                &datap[0],
856
1.38G
                bpno,
857
1.38G
                one,
858
1.38G
                nmsedec,
859
1.38G
                type,
860
1.38G
                0);
861
1.38G
            opj_t1_enc_refpass_step_macro(
862
1.38G
                mqc, curctx, a, c, ct,
863
1.38G
                flags, flagsUpdated,
864
1.38G
                &datap[1],
865
1.38G
                bpno,
866
1.38G
                one,
867
1.38G
                nmsedec,
868
1.38G
                type,
869
1.38G
                1);
870
1.38G
            opj_t1_enc_refpass_step_macro(
871
1.38G
                mqc, curctx, a, c, ct,
872
1.38G
                flags, flagsUpdated,
873
1.38G
                &datap[2],
874
1.38G
                bpno,
875
1.38G
                one,
876
1.38G
                nmsedec,
877
1.38G
                type,
878
1.38G
                2);
879
1.38G
            opj_t1_enc_refpass_step_macro(
880
1.38G
                mqc, curctx, a, c, ct,
881
1.38G
                flags, flagsUpdated,
882
1.38G
                &datap[3],
883
1.38G
                bpno,
884
1.38G
                one,
885
1.38G
                nmsedec,
886
1.38G
                type,
887
1.38G
                3);
888
1.38G
            *f = flagsUpdated;
889
1.38G
        }
890
81.2M
    }
891
892
8.12M
    if (k < t1->h) {
893
840k
        OPJ_UINT32 j;
894
840k
        const OPJ_UINT32 remaining_lines = t1->h - k;
895
#ifdef DEBUG_ENC_REF
896
        fprintf(stderr, " k=%d\n", k);
897
#endif
898
28.6M
        for (i = 0; i < t1->w; ++i, ++f) {
899
#ifdef DEBUG_ENC_REF
900
            fprintf(stderr, " i=%d\n", i);
901
#endif
902
27.8M
            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
903
                /* none significant */
904
17.7M
                datap += remaining_lines;
905
17.7M
                continue;
906
17.7M
            }
907
30.7M
            for (j = 0; j < remaining_lines; ++j, datap ++) {
908
20.6M
                opj_t1_enc_refpass_step_macro(
909
20.6M
                    mqc, curctx, a, c, ct,
910
20.6M
                    *f, *f,
911
20.6M
                    &datap[0],
912
20.6M
                    bpno,
913
20.6M
                    one,
914
20.6M
                    nmsedec,
915
20.6M
                    type,
916
20.6M
                    j);
917
20.6M
            }
918
10.0M
        }
919
840k
    }
920
921
8.12M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
922
8.12M
}
923
924
925
static void opj_t1_dec_refpass_raw(
926
    opj_t1_t *t1,
927
    OPJ_INT32 bpno)
928
32.9k
{
929
32.9k
    OPJ_INT32 one, poshalf;
930
32.9k
    OPJ_UINT32 i, j, k;
931
32.9k
    OPJ_INT32 *data = t1->data;
932
32.9k
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
933
32.9k
    const OPJ_UINT32 l_w = t1->w;
934
32.9k
    one = 1 << bpno;
935
32.9k
    poshalf = one >> 1;
936
362k
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
937
16.6M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
938
16.3M
            opj_flag_t flags = *flagsp;
939
16.3M
            if (flags != 0) {
940
10.9M
                opj_t1_dec_refpass_step_raw(
941
10.9M
                    t1,
942
10.9M
                    flagsp,
943
10.9M
                    data,
944
10.9M
                    poshalf,
945
10.9M
                    0U);
946
10.9M
                opj_t1_dec_refpass_step_raw(
947
10.9M
                    t1,
948
10.9M
                    flagsp,
949
10.9M
                    data + l_w,
950
10.9M
                    poshalf,
951
10.9M
                    1U);
952
10.9M
                opj_t1_dec_refpass_step_raw(
953
10.9M
                    t1,
954
10.9M
                    flagsp,
955
10.9M
                    data + 2 * l_w,
956
10.9M
                    poshalf,
957
10.9M
                    2U);
958
10.9M
                opj_t1_dec_refpass_step_raw(
959
10.9M
                    t1,
960
10.9M
                    flagsp,
961
10.9M
                    data + 3 * l_w,
962
10.9M
                    poshalf,
963
10.9M
                    3U);
964
10.9M
            }
965
16.3M
        }
966
329k
    }
967
32.9k
    if (k < t1->h) {
968
1.88M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
969
5.46M
            for (j = 0; j < t1->h - k; ++j) {
970
3.59M
                opj_t1_dec_refpass_step_raw(
971
3.59M
                    t1,
972
3.59M
                    flagsp,
973
3.59M
                    data + j * l_w,
974
3.59M
                    poshalf,
975
3.59M
                    j);
976
3.59M
            }
977
1.87M
        }
978
17.1k
    }
979
32.9k
}
980
981
1.72M
#define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
982
1.72M
{ \
983
1.72M
        OPJ_INT32 one, poshalf; \
984
1.72M
        OPJ_UINT32 i, j, k; \
985
1.72M
        register OPJ_INT32 *data = t1->data; \
986
1.72M
        register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
987
1.72M
        const OPJ_UINT32 l_w = w; \
988
1.72M
        opj_mqc_t* mqc = &(t1->mqc); \
989
1.72M
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
990
1.72M
        register OPJ_UINT32 v; \
991
1.72M
        one = 1 << bpno; \
992
1.72M
        poshalf = one >> 1; \
993
20.8M
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
994
749M
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
995
730M
                        opj_flag_t flags = *flagsp; \
996
730M
                        if( flags != 0 ) { \
997
462M
                            opj_t1_dec_refpass_step_mqc_macro( \
998
462M
                                flags, data, l_w, 0, \
999
462M
                                mqc, curctx, v, a, c, ct, poshalf); \
1000
462M
                            opj_t1_dec_refpass_step_mqc_macro( \
1001
462M
                                flags, data, l_w, 1, \
1002
462M
                                mqc, curctx, v, a, c, ct, poshalf); \
1003
462M
                            opj_t1_dec_refpass_step_mqc_macro( \
1004
462M
                                flags, data, l_w, 2, \
1005
462M
                                mqc, curctx, v, a, c, ct, poshalf); \
1006
462M
                            opj_t1_dec_refpass_step_mqc_macro( \
1007
462M
                                flags, data, l_w, 3, \
1008
462M
                                mqc, curctx, v, a, c, ct, poshalf); \
1009
462M
                            *flagsp = flags; \
1010
462M
                        } \
1011
730M
                } \
1012
19.1M
        } \
1013
1.72M
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1014
1.72M
        if( k < h ) { \
1015
13.9M
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1016
34.3M
                for (j = 0; j < h - k; ++j) { \
1017
21.0M
                        opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1018
21.0M
                } \
1019
13.3M
            } \
1020
623k
        } \
1021
1.72M
}
1022
1023
static void opj_t1_dec_refpass_mqc_64x64(
1024
    opj_t1_t *t1,
1025
    OPJ_INT32 bpno)
1026
612k
{
1027
612k
    opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1028
612k
}
1029
1030
static void opj_t1_dec_refpass_mqc_generic(
1031
    opj_t1_t *t1,
1032
    OPJ_INT32 bpno)
1033
1.10M
{
1034
1.10M
    opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1035
1.10M
}
1036
1037
static void opj_t1_dec_refpass_mqc(
1038
    opj_t1_t *t1,
1039
    OPJ_INT32 bpno)
1040
1.72M
{
1041
1.72M
    if (t1->w == 64 && t1->h == 64) {
1042
612k
        opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1043
1.10M
    } else {
1044
1.10M
        opj_t1_dec_refpass_mqc_generic(t1, bpno);
1045
1.10M
    }
1046
1.72M
}
1047
1048
/**
1049
Encode clean-up pass step
1050
*/
1051
1.92G
#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
1052
1.92G
{ \
1053
1.92G
    OPJ_UINT32 v; \
1054
1.92G
    OPJ_UINT32 ci; \
1055
1.92G
    opj_flag_t* const flagsp = (flagspIn); \
1056
1.92G
    const OPJ_INT32* l_datap = (datapIn); \
1057
1.92G
    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
1058
1.92G
                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1059
1.92G
 \
1060
1.92G
    if ((*flagsp & check) == check) { \
1061
3.72M
        if (runlen == 0) { \
1062
3.72M
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1063
3.72M
        } else if (runlen == 1) { \
1064
0
            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
1065
0
        } else if (runlen == 2) { \
1066
0
            *flagsp &= ~(T1_PI_2 | T1_PI_3); \
1067
0
        } else if (runlen == 3) { \
1068
0
            *flagsp &= ~(T1_PI_3); \
1069
0
        } \
1070
3.72M
    } \
1071
1.92G
    else \
1072
9.51G
    for (ci = runlen; ci < lim; ++ci) { \
1073
7.58G
        OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
1074
7.58G
        if ((agg != 0) && (ci == runlen)) { \
1075
26.0M
            goto_PARTIAL = OPJ_TRUE; \
1076
26.0M
        } \
1077
7.58G
        else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
1078
1.41G
            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
1079
1.41G
/* #ifdef DEBUG_ENC_CLN */ \
1080
1.41G
/*            printf("   ctxt1=%d\n", ctxt1); */ \
1081
1.41G
/* #endif */ \
1082
1.41G
            opj_t1_setcurctx(curctx, ctxt1); \
1083
1.41G
            v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
1084
1.41G
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
1085
1.41G
            if (v) { \
1086
87.8M
                goto_PARTIAL = OPJ_TRUE; \
1087
87.8M
            } \
1088
1.41G
        } \
1089
7.58G
        if( goto_PARTIAL ) { \
1090
113M
            OPJ_UINT32 vsc; \
1091
113M
            OPJ_UINT32 ctxt2, spb; \
1092
113M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1093
113M
                        *flagsp, \
1094
113M
                        flagsp[-1], flagsp[1], \
1095
113M
                        ci); \
1096
113M
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
1097
113M
                                                (OPJ_UINT32)bpno); \
1098
113M
            ctxt2 = opj_t1_getctxno_sc(lu); \
1099
113M
/* #ifdef DEBUG_ENC_CLN */ \
1100
113M
/*           printf("   ctxt2=%d\n", ctxt2); */ \
1101
113M
/* #endif */ \
1102
113M
            opj_t1_setcurctx(curctx, ctxt2); \
1103
113M
 \
1104
113M
            v = opj_smr_sign(*l_datap); \
1105
113M
            spb = opj_t1_getspb(lu); \
1106
113M
/* #ifdef DEBUG_ENC_CLN */ \
1107
113M
/*           printf("   spb=%d\n", spb); */\
1108
113M
/* #endif */ \
1109
113M
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
1110
113M
            vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
1111
113M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
1112
113M
        } \
1113
7.58G
        *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
1114
7.58G
        l_datap ++; \
1115
7.58G
    } \
1116
1.92G
}
1117
1118
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1119
                                      flags, flagsp, flags_stride, data, \
1120
                                      data_stride, ci, mqc, curctx, \
1121
2.39G
                                      v, a, c, ct, oneplushalf, vsc) \
1122
2.39G
{ \
1123
2.39G
    if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1124
560M
        do { \
1125
560M
            if( !partial ) { \
1126
552M
                OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1127
552M
                opj_t1_setcurctx(curctx, ctxt1); \
1128
552M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1129
552M
                if( !v ) \
1130
552M
                    break; \
1131
552M
            } \
1132
560M
            { \
1133
242M
                OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1134
242M
                                    flags, flagsp[-1], flagsp[1], \
1135
242M
                                    ci); \
1136
242M
                opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1137
242M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1138
242M
                v = v ^ opj_t1_getspb(lu); \
1139
242M
                data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1140
242M
                opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1141
242M
            } \
1142
242M
        } while(0); \
1143
560M
    } \
1144
2.39G
}
1145
1146
static void opj_t1_dec_clnpass_step(
1147
    opj_t1_t *t1,
1148
    opj_flag_t *flagsp,
1149
    OPJ_INT32 *datap,
1150
    OPJ_INT32 oneplushalf,
1151
    OPJ_UINT32 ci,
1152
    OPJ_UINT32 vsc)
1153
28.8M
{
1154
28.8M
    OPJ_UINT32 v;
1155
1156
28.8M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1157
28.8M
    opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1158
28.8M
                                  *flagsp, flagsp, t1->w + 2U, datap,
1159
28.8M
                                  0, ci, mqc, mqc->curctx,
1160
28.8M
                                  v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1161
28.8M
}
1162
1163
static void opj_t1_enc_clnpass(
1164
    opj_t1_t *t1,
1165
    OPJ_INT32 bpno,
1166
    OPJ_INT32 *nmsedec,
1167
    OPJ_UINT32 cblksty)
1168
9.02M
{
1169
9.02M
    OPJ_UINT32 i, k;
1170
9.02M
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1171
9.02M
    opj_mqc_t* mqc = &(t1->mqc);
1172
9.02M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1173
9.02M
    const OPJ_INT32* datap = t1->data;
1174
9.02M
    opj_flag_t *f = &T1_FLAGS(0, 0);
1175
9.02M
    const OPJ_UINT32 extra = 2U;
1176
1177
9.02M
    *nmsedec = 0;
1178
#ifdef DEBUG_ENC_CLN
1179
    printf("enc_clnpass: bpno=%d\n", bpno);
1180
#endif
1181
98.8M
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
1182
#ifdef DEBUG_ENC_CLN
1183
        printf(" k=%d\n", k);
1184
#endif
1185
4.30G
        for (i = 0; i < t1->w; ++i, f++) {
1186
4.21G
            OPJ_UINT32 agg, runlen;
1187
#ifdef DEBUG_ENC_CLN
1188
            printf("  i=%d\n", i);
1189
#endif
1190
4.21G
            agg = !*f;
1191
#ifdef DEBUG_ENC_CLN
1192
            printf("   agg=%d\n", agg);
1193
#endif
1194
4.21G
            if (agg) {
1195
11.6G
                for (runlen = 0; runlen < 4; ++runlen, ++datap) {
1196
9.36G
                    if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
1197
26.0M
                        break;
1198
26.0M
                    }
1199
9.36G
                }
1200
2.35G
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
1201
2.35G
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
1202
2.35G
                if (runlen == 4) {
1203
2.32G
                    continue;
1204
2.32G
                }
1205
26.0M
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
1206
26.0M
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
1207
26.0M
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
1208
1.86G
            } else {
1209
1.86G
                runlen = 0;
1210
1.86G
            }
1211
1.89G
            opj_t1_enc_clnpass_step_macro(
1212
1.89G
                mqc, curctx, a, c, ct,
1213
1.89G
                f,
1214
1.89G
                datap,
1215
1.89G
                bpno,
1216
1.89G
                one,
1217
1.89G
                nmsedec,
1218
1.89G
                agg,
1219
1.89G
                runlen,
1220
1.89G
                4U,
1221
1.89G
                cblksty);
1222
1.89G
            datap += 4 - runlen;
1223
1.89G
        }
1224
89.8M
    }
1225
9.02M
    if (k < t1->h) {
1226
948k
        const OPJ_UINT32 agg = 0;
1227
948k
        const OPJ_UINT32 runlen = 0;
1228
#ifdef DEBUG_ENC_CLN
1229
        printf(" k=%d\n", k);
1230
#endif
1231
32.1M
        for (i = 0; i < t1->w; ++i, f++) {
1232
#ifdef DEBUG_ENC_CLN
1233
            printf("  i=%d\n", i);
1234
            printf("   agg=%d\n", agg);
1235
#endif
1236
31.1M
            opj_t1_enc_clnpass_step_macro(
1237
31.1M
                mqc, curctx, a, c, ct,
1238
31.1M
                f,
1239
31.1M
                datap,
1240
31.1M
                bpno,
1241
31.1M
                one,
1242
31.1M
                nmsedec,
1243
31.1M
                agg,
1244
31.1M
                runlen,
1245
31.1M
                t1->h - k,
1246
31.1M
                cblksty);
1247
31.1M
            datap += t1->h - k;
1248
31.1M
        }
1249
948k
    }
1250
1251
9.02M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1252
9.02M
}
1253
1254
2.19M
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1255
2.19M
{ \
1256
2.19M
    OPJ_INT32 one, half, oneplushalf; \
1257
2.19M
    OPJ_UINT32 runlen; \
1258
2.19M
    OPJ_UINT32 i, j, k; \
1259
2.19M
    const OPJ_UINT32 l_w = w; \
1260
2.19M
    opj_mqc_t* mqc = &(t1->mqc); \
1261
2.19M
    register OPJ_INT32 *data = t1->data; \
1262
2.19M
    register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1263
2.19M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1264
2.19M
    register OPJ_UINT32 v; \
1265
2.19M
    one = 1 << bpno; \
1266
2.19M
    half = one >> 1; \
1267
2.19M
    oneplushalf = one | half; \
1268
26.9M
    for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1269
1.01G
        for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1270
989M
            opj_flag_t flags = *flagsp; \
1271
989M
            if (flags == 0) { \
1272
402M
                OPJ_UINT32 partial = OPJ_TRUE; \
1273
402M
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1274
402M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1275
402M
                if (!v) { \
1276
393M
                    continue; \
1277
393M
                } \
1278
402M
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1279
8.82M
                opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1280
8.82M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1281
8.82M
                runlen = (runlen << 1) | v; \
1282
8.82M
                switch(runlen) { \
1283
2.77M
                    case 0: \
1284
2.77M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1285
2.77M
                                            flags, flagsp, flags_stride, data, \
1286
2.77M
                                            l_w, 0, mqc, curctx, \
1287
2.77M
                                            v, a, c, ct, oneplushalf, vsc); \
1288
2.77M
                        partial = OPJ_FALSE; \
1289
2.77M
                        /* FALLTHRU */ \
1290
5.04M
                    case 1: \
1291
5.04M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1292
5.04M
                                            flags, flagsp, flags_stride, data, \
1293
5.04M
                                            l_w, 1, mqc, curctx, \
1294
5.04M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1295
5.04M
                        partial = OPJ_FALSE; \
1296
5.04M
                        /* FALLTHRU */ \
1297
7.09M
                    case 2: \
1298
7.09M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1299
7.09M
                                            flags, flagsp, flags_stride, data, \
1300
7.09M
                                            l_w, 2, mqc, curctx, \
1301
7.09M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1302
7.09M
                        partial = OPJ_FALSE; \
1303
7.09M
                        /* FALLTHRU */ \
1304
8.82M
                    case 3: \
1305
8.82M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1306
8.82M
                                            flags, flagsp, flags_stride, data, \
1307
8.82M
                                            l_w, 3, mqc, curctx, \
1308
8.82M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1309
8.82M
                        break; \
1310
8.82M
                } \
1311
586M
            } else { \
1312
586M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1313
586M
                                    flags, flagsp, flags_stride, data, \
1314
586M
                                    l_w, 0, mqc, curctx, \
1315
586M
                                    v, a, c, ct, oneplushalf, vsc); \
1316
586M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1317
586M
                                    flags, flagsp, flags_stride, data, \
1318
586M
                                    l_w, 1, mqc, curctx, \
1319
586M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1320
586M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1321
586M
                                    flags, flagsp, flags_stride, data, \
1322
586M
                                    l_w, 2, mqc, curctx, \
1323
586M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1324
586M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1325
586M
                                    flags, flagsp, flags_stride, data, \
1326
586M
                                    l_w, 3, mqc, curctx, \
1327
586M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1328
586M
            } \
1329
989M
            *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1330
595M
        } \
1331
24.7M
    } \
1332
2.19M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1333
2.19M
    if( k < h ) { \
1334
18.3M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1335
46.4M
            for (j = 0; j < h - k; ++j) { \
1336
28.8M
                opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1337
28.8M
            } \
1338
17.5M
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1339
17.5M
        } \
1340
760k
    } \
1341
2.19M
}
1342
1343
static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1344
2.19M
{
1345
2.19M
    if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1346
1.50M
        opj_mqc_t* mqc = &(t1->mqc);
1347
1.50M
        OPJ_UINT32 v, v2;
1348
1.50M
        opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1349
1.50M
        opj_mqc_decode(v, mqc);
1350
1.50M
        opj_mqc_decode(v2, mqc);
1351
1.50M
        v = (v << 1) | v2;
1352
1.50M
        opj_mqc_decode(v2, mqc);
1353
1.50M
        v = (v << 1) | v2;
1354
1.50M
        opj_mqc_decode(v2, mqc);
1355
1.50M
        v = (v << 1) | v2;
1356
        /*
1357
        if (v!=0xa) {
1358
            opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1359
        }
1360
        */
1361
1.50M
    }
1362
2.19M
}
1363
1364
static void opj_t1_dec_clnpass_64x64_novsc(
1365
    opj_t1_t *t1,
1366
    OPJ_INT32 bpno)
1367
391k
{
1368
391k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1369
391k
}
1370
1371
static void opj_t1_dec_clnpass_64x64_vsc(
1372
    opj_t1_t *t1,
1373
    OPJ_INT32 bpno)
1374
418k
{
1375
418k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1376
418k
}
1377
1378
static void opj_t1_dec_clnpass_generic_novsc(
1379
    opj_t1_t *t1,
1380
    OPJ_INT32 bpno)
1381
704k
{
1382
704k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1383
704k
                                t1->w + 2U);
1384
704k
}
1385
1386
static void opj_t1_dec_clnpass_generic_vsc(
1387
    opj_t1_t *t1,
1388
    OPJ_INT32 bpno)
1389
677k
{
1390
677k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1391
677k
                                t1->w + 2U);
1392
677k
}
1393
1394
static void opj_t1_dec_clnpass(
1395
    opj_t1_t *t1,
1396
    OPJ_INT32 bpno,
1397
    OPJ_INT32 cblksty)
1398
2.19M
{
1399
2.19M
    if (t1->w == 64 && t1->h == 64) {
1400
810k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1401
418k
            opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1402
418k
        } else {
1403
391k
            opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1404
391k
        }
1405
1.38M
    } else {
1406
1.38M
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1407
677k
            opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1408
704k
        } else {
1409
704k
            opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1410
704k
        }
1411
1.38M
    }
1412
2.19M
    opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1413
2.19M
}
1414
1415
1416
static OPJ_FLOAT64 opj_t1_getwmsedec(
1417
    OPJ_INT32 nmsedec,
1418
    OPJ_UINT32 compno,
1419
    OPJ_UINT32 level,
1420
    OPJ_UINT32 orient,
1421
    OPJ_INT32 bpno,
1422
    OPJ_UINT32 qmfbid,
1423
    OPJ_FLOAT64 stepsize,
1424
    OPJ_UINT32 numcomps,
1425
    const OPJ_FLOAT64 * mct_norms,
1426
    OPJ_UINT32 mct_numcomps)
1427
25.2M
{
1428
25.2M
    OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1429
25.2M
    OPJ_ARG_NOT_USED(numcomps);
1430
1431
25.2M
    if (mct_norms && (compno < mct_numcomps)) {
1432
299k
        w1 = mct_norms[compno];
1433
299k
    }
1434
1435
25.2M
    if (qmfbid == 1) {
1436
11.4M
        w2 = opj_dwt_getnorm(level, orient);
1437
13.8M
    } else {    /* if (qmfbid == 0) */
1438
13.8M
        const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1439
13.8M
                                    (orient == 3) ? 2 : 1;
1440
13.8M
        w2 = opj_dwt_getnorm_real(level, orient);
1441
        /* Not sure this is right. But preserves past behaviour */
1442
13.8M
        stepsize /= (1 << log2_gain);
1443
13.8M
    }
1444
1445
25.2M
    wmsedec = w1 * w2 * stepsize * (1 << bpno);
1446
25.2M
    wmsedec *= wmsedec * nmsedec / 8192.0;
1447
1448
25.2M
    return wmsedec;
1449
25.2M
}
1450
1451
static OPJ_BOOL opj_t1_allocate_buffers(
1452
    opj_t1_t *t1,
1453
    OPJ_UINT32 w,
1454
    OPJ_UINT32 h)
1455
35.0M
{
1456
35.0M
    OPJ_UINT32 flagssize;
1457
35.0M
    OPJ_UINT32 flags_stride;
1458
1459
    /* No risk of overflow. Prior checks ensure those assert are met */
1460
    /* They are per the specification */
1461
35.0M
    assert(w <= 1024);
1462
35.0M
    assert(h <= 1024);
1463
35.0M
    assert(w * h <= 4096);
1464
1465
    /* encoder uses tile buffer, so no need to allocate */
1466
35.0M
    {
1467
35.0M
        OPJ_UINT32 datasize = w * h;
1468
1469
35.0M
        if (datasize > t1->datasize) {
1470
118k
            opj_aligned_free(t1->data);
1471
118k
            t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1472
118k
            if (!t1->data) {
1473
                /* FIXME event manager error callback */
1474
0
                return OPJ_FALSE;
1475
0
            }
1476
118k
            t1->datasize = datasize;
1477
118k
        }
1478
        /* memset first arg is declared to never be null by gcc */
1479
35.0M
        if (t1->data != NULL) {
1480
35.0M
            memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1481
35.0M
        }
1482
35.0M
    }
1483
1484
0
    flags_stride = w + 2U; /* can't be 0U */
1485
1486
35.0M
    flagssize = (h + 3U) / 4U + 2U;
1487
1488
35.0M
    flagssize *= flags_stride;
1489
35.0M
    {
1490
35.0M
        opj_flag_t* p;
1491
35.0M
        OPJ_UINT32 x;
1492
35.0M
        OPJ_UINT32 flags_height = (h + 3U) / 4U;
1493
1494
35.0M
        if (flagssize > t1->flagssize) {
1495
1496
1.37M
            opj_aligned_free(t1->flags);
1497
1.37M
            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1498
1.37M
                            opj_flag_t));
1499
1.37M
            if (!t1->flags) {
1500
                /* FIXME event manager error callback */
1501
0
                return OPJ_FALSE;
1502
0
            }
1503
1.37M
        }
1504
35.0M
        t1->flagssize = flagssize;
1505
1506
35.0M
        memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1507
1508
35.0M
        p = &t1->flags[0];
1509
694M
        for (x = 0; x < flags_stride; ++x) {
1510
            /* magic value to hopefully stop any passes being interested in this entry */
1511
659M
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1512
659M
        }
1513
1514
35.0M
        p = &t1->flags[((flags_height + 1) * flags_stride)];
1515
694M
        for (x = 0; x < flags_stride; ++x) {
1516
            /* magic value to hopefully stop any passes being interested in this entry */
1517
659M
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1518
659M
        }
1519
1520
35.0M
        if (h % 4) {
1521
11.4M
            OPJ_UINT32 v = 0;
1522
11.4M
            p = &t1->flags[((flags_height) * flags_stride)];
1523
11.4M
            if (h % 4 == 1) {
1524
7.26M
                v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1525
7.26M
            } else if (h % 4 == 2) {
1526
3.31M
                v |= T1_PI_2 | T1_PI_3;
1527
3.31M
            } else if (h % 4 == 3) {
1528
851k
                v |= T1_PI_3;
1529
851k
            }
1530
166M
            for (x = 0; x < flags_stride; ++x) {
1531
155M
                *p++ = v;
1532
155M
            }
1533
11.4M
        }
1534
35.0M
    }
1535
1536
0
    t1->w = w;
1537
35.0M
    t1->h = h;
1538
1539
35.0M
    return OPJ_TRUE;
1540
35.0M
}
1541
1542
/* ----------------------------------------------------------------------- */
1543
1544
/* ----------------------------------------------------------------------- */
1545
/**
1546
 * Creates a new Tier 1 handle
1547
 * and initializes the look-up tables of the Tier-1 coder/decoder
1548
 * @return a new T1 handle if successful, returns NULL otherwise
1549
*/
1550
opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1551
49.0k
{
1552
49.0k
    opj_t1_t *l_t1 = 00;
1553
1554
49.0k
    l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1555
49.0k
    if (!l_t1) {
1556
0
        return 00;
1557
0
    }
1558
1559
49.0k
    l_t1->encoder = isEncoder;
1560
1561
49.0k
    return l_t1;
1562
49.0k
}
1563
1564
1565
/**
1566
 * Destroys a previously created T1 handle
1567
 *
1568
 * @param p_t1 Tier 1 handle to destroy
1569
*/
1570
void opj_t1_destroy(opj_t1_t *p_t1)
1571
49.0k
{
1572
49.0k
    if (! p_t1) {
1573
0
        return;
1574
0
    }
1575
1576
49.0k
    if (p_t1->data) {
1577
48.9k
        opj_aligned_free(p_t1->data);
1578
48.9k
        p_t1->data = 00;
1579
48.9k
    }
1580
1581
49.0k
    if (p_t1->flags) {
1582
49.0k
        opj_aligned_free(p_t1->flags);
1583
49.0k
        p_t1->flags = 00;
1584
49.0k
    }
1585
1586
49.0k
    opj_free(p_t1->cblkdatabuffer);
1587
1588
49.0k
    opj_free(p_t1);
1589
49.0k
}
1590
1591
typedef struct {
1592
    OPJ_BOOL whole_tile_decoding;
1593
    OPJ_UINT32 resno;
1594
    opj_tcd_cblk_dec_t* cblk;
1595
    opj_tcd_band_t* band;
1596
    opj_tcd_tilecomp_t* tilec;
1597
    opj_tccp_t* tccp;
1598
    OPJ_BOOL mustuse_cblkdatabuffer;
1599
    volatile OPJ_BOOL* pret;
1600
    opj_event_mgr_t *p_manager;
1601
    opj_mutex_t* p_manager_mutex;
1602
    OPJ_BOOL check_pterm;
1603
} opj_t1_cblk_decode_processing_job_t;
1604
1605
static void opj_t1_destroy_wrapper(void* t1)
1606
49.0k
{
1607
49.0k
    opj_t1_destroy((opj_t1_t*) t1);
1608
49.0k
}
1609
1610
static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1611
37.4M
{
1612
37.4M
    opj_tcd_cblk_dec_t* cblk;
1613
37.4M
    opj_tcd_band_t* band;
1614
37.4M
    opj_tcd_tilecomp_t* tilec;
1615
37.4M
    opj_tccp_t* tccp;
1616
37.4M
    OPJ_INT32* OPJ_RESTRICT datap;
1617
37.4M
    OPJ_UINT32 cblk_w, cblk_h;
1618
37.4M
    OPJ_INT32 x, y;
1619
37.4M
    OPJ_UINT32 i, j;
1620
37.4M
    opj_t1_cblk_decode_processing_job_t* job;
1621
37.4M
    opj_t1_t* t1;
1622
37.4M
    OPJ_UINT32 resno;
1623
37.4M
    OPJ_UINT32 tile_w;
1624
1625
37.4M
    job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1626
1627
37.4M
    cblk = job->cblk;
1628
1629
37.4M
    if (!job->whole_tile_decoding) {
1630
21.2M
        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1631
21.2M
        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1632
1633
21.2M
        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1634
21.2M
                             cblk_w * cblk_h);
1635
21.2M
        if (cblk->decoded_data == NULL) {
1636
0
            if (job->p_manager_mutex) {
1637
0
                opj_mutex_lock(job->p_manager_mutex);
1638
0
            }
1639
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1640
0
                          "Cannot allocate cblk->decoded_data\n");
1641
0
            if (job->p_manager_mutex) {
1642
0
                opj_mutex_unlock(job->p_manager_mutex);
1643
0
            }
1644
0
            *(job->pret) = OPJ_FALSE;
1645
0
            opj_free(job);
1646
0
            return;
1647
0
        }
1648
        /* Zero-init required */
1649
21.2M
        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1650
21.2M
    } else if (cblk->decoded_data) {
1651
        /* Not sure if that code path can happen, but better be */
1652
        /* safe than sorry */
1653
0
        opj_aligned_free(cblk->decoded_data);
1654
0
        cblk->decoded_data = NULL;
1655
0
    }
1656
1657
37.4M
    resno = job->resno;
1658
37.4M
    band = job->band;
1659
37.4M
    tilec = job->tilec;
1660
37.4M
    tccp = job->tccp;
1661
37.4M
    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1662
37.4M
                          -
1663
37.4M
                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1664
1665
37.4M
    if (!*(job->pret)) {
1666
0
        opj_free(job);
1667
0
        return;
1668
0
    }
1669
1670
37.4M
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1671
37.4M
    if (t1 == NULL) {
1672
37.2k
        t1 = opj_t1_create(OPJ_FALSE);
1673
37.2k
        if (t1 == NULL) {
1674
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1675
0
                          "Cannot allocate Tier 1 handle\n");
1676
0
            *(job->pret) = OPJ_FALSE;
1677
0
            opj_free(job);
1678
0
            return;
1679
0
        }
1680
37.2k
        if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
1681
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1682
0
                          "Unable to set t1 handle as TLS\n");
1683
0
            opj_t1_destroy(t1);
1684
0
            *(job->pret) = OPJ_FALSE;
1685
0
            opj_free(job);
1686
0
            return;
1687
0
        }
1688
37.2k
    }
1689
37.4M
    t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1690
1691
37.4M
    if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
1692
4.73M
        if (OPJ_FALSE == opj_t1_ht_decode_cblk(
1693
4.73M
                    t1,
1694
4.73M
                    cblk,
1695
4.73M
                    band->bandno,
1696
4.73M
                    (OPJ_UINT32)tccp->roishift,
1697
4.73M
                    tccp->cblksty,
1698
4.73M
                    job->p_manager,
1699
4.73M
                    job->p_manager_mutex,
1700
4.73M
                    job->check_pterm)) {
1701
2.00k
            *(job->pret) = OPJ_FALSE;
1702
2.00k
            opj_free(job);
1703
2.00k
            return;
1704
2.00k
        }
1705
32.7M
    } else {
1706
32.7M
        if (OPJ_FALSE == opj_t1_decode_cblk(
1707
32.7M
                    t1,
1708
32.7M
                    cblk,
1709
32.7M
                    band->bandno,
1710
32.7M
                    (OPJ_UINT32)tccp->roishift,
1711
32.7M
                    tccp->cblksty,
1712
32.7M
                    job->p_manager,
1713
32.7M
                    job->p_manager_mutex,
1714
32.7M
                    job->check_pterm)) {
1715
250
            *(job->pret) = OPJ_FALSE;
1716
250
            opj_free(job);
1717
250
            return;
1718
250
        }
1719
32.7M
    }
1720
1721
37.4M
    x = cblk->x0 - band->x0;
1722
37.4M
    y = cblk->y0 - band->y0;
1723
37.4M
    if (band->bandno & 1) {
1724
7.58M
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1725
7.58M
        x += pres->x1 - pres->x0;
1726
7.58M
    }
1727
37.4M
    if (band->bandno & 2) {
1728
7.69M
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1729
7.69M
        y += pres->y1 - pres->y0;
1730
7.69M
    }
1731
1732
37.4M
    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1733
37.4M
    cblk_w = t1->w;
1734
37.4M
    cblk_h = t1->h;
1735
1736
37.4M
    if (tccp->roishift) {
1737
1.38M
        if (tccp->roishift >= 31) {
1738
20.4k
            for (j = 0; j < cblk_h; ++j) {
1739
545k
                for (i = 0; i < cblk_w; ++i) {
1740
525k
                    datap[(j * cblk_w) + i] = 0;
1741
525k
                }
1742
19.8k
            }
1743
1.38M
        } else {
1744
1.38M
            OPJ_INT32 thresh = 1 << tccp->roishift;
1745
5.15M
            for (j = 0; j < cblk_h; ++j) {
1746
98.2M
                for (i = 0; i < cblk_w; ++i) {
1747
94.4M
                    OPJ_INT32 val = datap[(j * cblk_w) + i];
1748
94.4M
                    OPJ_INT32 mag = abs(val);
1749
94.4M
                    if (mag >= thresh) {
1750
10.2M
                        mag >>= tccp->roishift;
1751
10.2M
                        datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1752
10.2M
                    }
1753
94.4M
                }
1754
3.77M
            }
1755
1.38M
        }
1756
1.38M
    }
1757
1758
    /* Both can be non NULL if for example decoding a full tile and then */
1759
    /* partially a tile. In which case partial decoding should be the */
1760
    /* priority */
1761
37.4M
    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1762
1763
37.4M
    if (cblk->decoded_data) {
1764
21.2M
        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1765
21.2M
        if (tccp->qmfbid == 1) {
1766
4.15G
            for (i = 0; i < cblk_size; ++i) {
1767
4.14G
                datap[i] /= 2;
1768
4.14G
            }
1769
12.5M
        } else {        /* if (tccp->qmfbid == 0) */
1770
8.68M
            const float stepsize = 0.5f * band->stepsize;
1771
8.68M
            i = 0;
1772
8.68M
#ifdef __SSE2__
1773
8.68M
            {
1774
8.68M
                const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1775
76.6M
                for (; i < (cblk_size & ~15U); i += 16) {
1776
68.0M
                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1777
68.0M
                                                           datap + 0)));
1778
68.0M
                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1779
68.0M
                                                           datap + 4)));
1780
68.0M
                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1781
68.0M
                                                           datap + 8)));
1782
68.0M
                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1783
68.0M
                                                           datap + 12)));
1784
68.0M
                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1785
68.0M
                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1786
68.0M
                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1787
68.0M
                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1788
68.0M
                    datap += 16;
1789
68.0M
                }
1790
8.68M
            }
1791
8.68M
#endif
1792
22.2M
            for (; i < cblk_size; ++i) {
1793
13.5M
                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1794
13.5M
                memcpy(datap, &tmp, sizeof(tmp));
1795
13.5M
                datap++;
1796
13.5M
            }
1797
8.68M
        }
1798
21.2M
    } else if (tccp->qmfbid == 1) {
1799
9.46M
        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1800
9.46M
                                                       (OPJ_SIZE_T)x];
1801
203M
        for (j = 0; j < cblk_h; ++j) {
1802
            //positive -> round down aka.  (83)/2 =  41.5 ->  41
1803
            //negative -> round up   aka. (-83)/2 = -41.5 -> -41
1804
#if defined(__AVX512F__)
1805
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1806
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1807
            for (i = 0; i < cblk_w / 16; ++i) {
1808
                __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in));
1809
                const __m512i add_avx = _mm512_srli_epi32(in_avx, 31);
1810
                in_avx = _mm512_add_epi32(in_avx, add_avx);
1811
                _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1));
1812
                ptr_in += 16;
1813
                ptr_out += 16;
1814
            }
1815
1816
            for (i = 0; i < cblk_w % 16; ++i) {
1817
                ptr_out[i] = ptr_in[i] / 2;
1818
            }
1819
#elif defined(__AVX2__)
1820
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1821
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1822
            for (i = 0; i < cblk_w / 8; ++i) {
1823
                __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in));
1824
                const __m256i add_avx = _mm256_srli_epi32(in_avx, 31);
1825
                in_avx = _mm256_add_epi32(in_avx, add_avx);
1826
                _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1));
1827
                ptr_in += 8;
1828
                ptr_out += 8;
1829
            }
1830
1831
            for (i = 0; i < cblk_w % 8; ++i) {
1832
                ptr_out[i] = ptr_in[i] / 2;
1833
            }
1834
#else
1835
194M
            i = 0;
1836
1.57G
            for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1837
1.37G
                OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1838
1.37G
                OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1839
1.37G
                OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1840
1.37G
                OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1841
1.37G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1842
1.37G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1843
1.37G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1844
1.37G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1845
1.37G
            }
1846
266M
            for (; i < cblk_w; ++i) {
1847
72.1M
                OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1848
72.1M
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1849
72.1M
            }
1850
194M
#endif
1851
194M
        }
1852
9.46M
    } else {        /* if (tccp->qmfbid == 0) */
1853
6.70M
        const float stepsize = 0.5f * band->stepsize;
1854
6.70M
        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1855
6.70M
                                                         tile_w + (OPJ_SIZE_T)x];
1856
58.2M
        for (j = 0; j < cblk_h; ++j) {
1857
51.5M
            OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1858
1.54G
            for (i = 0; i < cblk_w; ++i) {
1859
1.49G
                OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1860
1.49G
                *tiledp2 = tmp;
1861
1.49G
                datap++;
1862
1.49G
                tiledp2++;
1863
1.49G
            }
1864
51.5M
            tiledp += tile_w;
1865
51.5M
        }
1866
6.70M
    }
1867
1868
37.4M
    opj_free(job);
1869
37.4M
}
1870
1871
1872
void opj_t1_decode_cblks(opj_tcd_t* tcd,
1873
                         volatile OPJ_BOOL* pret,
1874
                         opj_tcd_tilecomp_t* tilec,
1875
                         opj_tccp_t* tccp,
1876
                         opj_event_mgr_t *p_manager,
1877
                         opj_mutex_t* p_manager_mutex,
1878
                         OPJ_BOOL check_pterm
1879
                        )
1880
222k
{
1881
222k
    opj_thread_pool_t* tp = tcd->thread_pool;
1882
222k
    OPJ_UINT32 resno, bandno, precno, cblkno;
1883
1884
#ifdef DEBUG_VERBOSE
1885
    OPJ_UINT32 codeblocks_decoded = 0;
1886
    printf("Enter opj_t1_decode_cblks()\n");
1887
#endif
1888
1889
2.24M
    for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1890
2.02M
        opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1891
1892
7.66M
        for (bandno = 0; bandno < res->numbands; ++bandno) {
1893
5.63M
            opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1894
1895
38.8M
            for (precno = 0; precno < res->pw * res->ph; ++precno) {
1896
33.2M
                opj_tcd_precinct_t* precinct = &band->precincts[precno];
1897
1898
33.2M
                if (!opj_tcd_is_subband_area_of_interest(tcd,
1899
33.2M
                        tilec->compno,
1900
33.2M
                        resno,
1901
33.2M
                        band->bandno,
1902
33.2M
                        (OPJ_UINT32)precinct->x0,
1903
33.2M
                        (OPJ_UINT32)precinct->y0,
1904
33.2M
                        (OPJ_UINT32)precinct->x1,
1905
33.2M
                        (OPJ_UINT32)precinct->y1)) {
1906
20.0M
                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1907
9.94M
                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1908
9.94M
                        if (cblk->decoded_data) {
1909
#ifdef DEBUG_VERBOSE
1910
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1911
                                   cblk->x0, cblk->y0, resno, bandno);
1912
#endif
1913
0
                            opj_aligned_free(cblk->decoded_data);
1914
0
                            cblk->decoded_data = NULL;
1915
0
                        }
1916
9.94M
                    }
1917
10.0M
                    continue;
1918
10.0M
                }
1919
1920
63.4M
                for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1921
40.3M
                    opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1922
40.3M
                    opj_t1_cblk_decode_processing_job_t* job;
1923
1924
40.3M
                    if (!opj_tcd_is_subband_area_of_interest(tcd,
1925
40.3M
                            tilec->compno,
1926
40.3M
                            resno,
1927
40.3M
                            band->bandno,
1928
40.3M
                            (OPJ_UINT32)cblk->x0,
1929
40.3M
                            (OPJ_UINT32)cblk->y0,
1930
40.3M
                            (OPJ_UINT32)cblk->x1,
1931
40.3M
                            (OPJ_UINT32)cblk->y1)) {
1932
2.62M
                        if (cblk->decoded_data) {
1933
#ifdef DEBUG_VERBOSE
1934
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1935
                                   cblk->x0, cblk->y0, resno, bandno);
1936
#endif
1937
0
                            opj_aligned_free(cblk->decoded_data);
1938
0
                            cblk->decoded_data = NULL;
1939
0
                        }
1940
2.62M
                        continue;
1941
2.62M
                    }
1942
1943
37.6M
                    if (!tcd->whole_tile_decoding) {
1944
21.5M
                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1945
21.5M
                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1946
21.5M
                        if (cblk->decoded_data != NULL) {
1947
#ifdef DEBUG_VERBOSE
1948
                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1949
                                   cblk->x0, cblk->y0, resno, bandno);
1950
#endif
1951
0
                            continue;
1952
0
                        }
1953
21.5M
                        if (cblk_w == 0 || cblk_h == 0) {
1954
228k
                            continue;
1955
228k
                        }
1956
#ifdef DEBUG_VERBOSE
1957
                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1958
                               cblk->x0, cblk->y0, resno, bandno);
1959
#endif
1960
21.5M
                    }
1961
1962
37.4M
                    job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1963
37.4M
                            sizeof(opj_t1_cblk_decode_processing_job_t));
1964
37.4M
                    if (!job) {
1965
0
                        *pret = OPJ_FALSE;
1966
0
                        return;
1967
0
                    }
1968
37.4M
                    job->whole_tile_decoding = tcd->whole_tile_decoding;
1969
37.4M
                    job->resno = resno;
1970
37.4M
                    job->cblk = cblk;
1971
37.4M
                    job->band = band;
1972
37.4M
                    job->tilec = tilec;
1973
37.4M
                    job->tccp = tccp;
1974
37.4M
                    job->pret = pret;
1975
37.4M
                    job->p_manager_mutex = p_manager_mutex;
1976
37.4M
                    job->p_manager = p_manager;
1977
37.4M
                    job->check_pterm = check_pterm;
1978
37.4M
                    job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1979
37.4M
                    opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1980
#ifdef DEBUG_VERBOSE
1981
                    codeblocks_decoded ++;
1982
#endif
1983
37.4M
                    if (!(*pret)) {
1984
2.25k
                        return;
1985
2.25k
                    }
1986
37.4M
                } /* cblkno */
1987
23.1M
            } /* precno */
1988
5.63M
        } /* bandno */
1989
2.02M
    } /* resno */
1990
1991
#ifdef DEBUG_VERBOSE
1992
    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1993
#endif
1994
219k
    return;
1995
222k
}
1996
1997
1998
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1999
                                   opj_tcd_cblk_dec_t* cblk,
2000
                                   OPJ_UINT32 orient,
2001
                                   OPJ_UINT32 roishift,
2002
                                   OPJ_UINT32 cblksty,
2003
                                   opj_event_mgr_t *p_manager,
2004
                                   opj_mutex_t* p_manager_mutex,
2005
                                   OPJ_BOOL check_pterm)
2006
32.7M
{
2007
32.7M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2008
2009
32.7M
    OPJ_INT32 bpno_plus_one;
2010
32.7M
    OPJ_UINT32 passtype;
2011
32.7M
    OPJ_UINT32 segno, passno;
2012
32.7M
    OPJ_BYTE* cblkdata = NULL;
2013
32.7M
    OPJ_UINT32 cblkdataindex = 0;
2014
32.7M
    OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
2015
32.7M
    OPJ_INT32* original_t1_data = NULL;
2016
2017
32.7M
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2018
2019
32.7M
    if (!opj_t1_allocate_buffers(
2020
32.7M
                t1,
2021
32.7M
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2022
32.7M
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2023
0
        return OPJ_FALSE;
2024
0
    }
2025
2026
32.7M
    bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
2027
32.7M
    if (bpno_plus_one >= 31) {
2028
250
        if (p_manager_mutex) {
2029
250
            opj_mutex_lock(p_manager_mutex);
2030
250
        }
2031
250
        opj_event_msg(p_manager, EVT_WARNING,
2032
250
                      "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
2033
250
                      bpno_plus_one);
2034
250
        if (p_manager_mutex) {
2035
250
            opj_mutex_unlock(p_manager_mutex);
2036
250
        }
2037
250
        return OPJ_FALSE;
2038
250
    }
2039
32.7M
    passtype = 2;
2040
2041
32.7M
    opj_mqc_resetstates(mqc);
2042
32.7M
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2043
32.7M
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2044
32.7M
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2045
2046
32.7M
    if (cblk->corrupted) {
2047
0
        assert(cblk->numchunks == 0);
2048
0
        return OPJ_TRUE;
2049
0
    }
2050
2051
    /* Even if we have a single chunk, in multi-threaded decoding */
2052
    /* the insertion of our synthetic marker might potentially override */
2053
    /* valid codestream of other codeblocks decoded in parallel. */
2054
32.7M
    if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer &&
2055
46.1k
                                cblk->numchunks > 0)) {
2056
46.1k
        OPJ_UINT32 i;
2057
46.1k
        OPJ_UINT32 cblk_len;
2058
2059
        /* Compute whole codeblock length from chunk lengths */
2060
46.1k
        cblk_len = 0;
2061
303k
        for (i = 0; i < cblk->numchunks; i++) {
2062
257k
            cblk_len += cblk->chunks[i].len;
2063
257k
        }
2064
2065
        /* Allocate temporary memory if needed */
2066
46.1k
        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
2067
12.7k
            cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
2068
12.7k
                                              cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
2069
12.7k
            if (cblkdata == NULL) {
2070
0
                return OPJ_FALSE;
2071
0
            }
2072
12.7k
            t1->cblkdatabuffer = cblkdata;
2073
12.7k
            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
2074
12.7k
            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
2075
12.7k
        }
2076
2077
        /* Concatenate all chunks */
2078
46.1k
        cblkdata = t1->cblkdatabuffer;
2079
46.1k
        cblk_len = 0;
2080
303k
        for (i = 0; i < cblk->numchunks; i++) {
2081
257k
            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2082
257k
            cblk_len += cblk->chunks[i].len;
2083
257k
        }
2084
32.6M
    } else if (cblk->numchunks == 1) {
2085
422k
        cblkdata = cblk->chunks[0].data;
2086
32.2M
    } else {
2087
        /* Not sure if that can happen in practice, but avoid Coverity to */
2088
        /* think we will dereference a null cblkdta pointer */
2089
32.2M
        return OPJ_TRUE;
2090
32.2M
    }
2091
2092
    /* For subtile decoding, directly decode in the decoded_data buffer of */
2093
    /* the code-block. Hack t1->data to point to it, and restore it later */
2094
468k
    if (cblk->decoded_data) {
2095
235k
        original_t1_data = t1->data;
2096
235k
        t1->data = cblk->decoded_data;
2097
235k
    }
2098
2099
1.11M
    for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2100
641k
        opj_tcd_seg_t *seg = &cblk->segs[segno];
2101
2102
        /* BYPASS mode */
2103
641k
        type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2104
578k
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2105
2106
641k
        if (type == T1_TYPE_RAW) {
2107
63.2k
            opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2108
63.2k
                                 OPJ_COMMON_CBLK_DATA_EXTRA);
2109
578k
        } else {
2110
578k
            opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2111
578k
                             OPJ_COMMON_CBLK_DATA_EXTRA);
2112
578k
        }
2113
641k
        cblkdataindex += seg->len;
2114
2115
6.44M
        for (passno = 0; (passno < seg->real_num_passes) &&
2116
5.87M
                (bpno_plus_one >= 1); ++passno) {
2117
5.79M
            switch (passtype) {
2118
1.85M
            case 0:
2119
1.85M
                if (type == T1_TYPE_RAW) {
2120
34.1k
                    opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2121
1.81M
                } else {
2122
1.81M
                    opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2123
1.81M
                }
2124
1.85M
                break;
2125
1.75M
            case 1:
2126
1.75M
                if (type == T1_TYPE_RAW) {
2127
32.9k
                    opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2128
1.72M
                } else {
2129
1.72M
                    opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2130
1.72M
                }
2131
1.75M
                break;
2132
2.19M
            case 2:
2133
2.19M
                opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2134
2.19M
                break;
2135
5.79M
            }
2136
2137
5.79M
            if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2138
3.87M
                opj_mqc_resetstates(mqc);
2139
3.87M
                opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2140
3.87M
                opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2141
3.87M
                opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2142
3.87M
            }
2143
5.79M
            if (++passtype == 3) {
2144
2.19M
                passtype = 0;
2145
2.19M
                bpno_plus_one--;
2146
2.19M
            }
2147
5.79M
        }
2148
2149
641k
        opq_mqc_finish_dec(mqc);
2150
641k
    }
2151
2152
468k
    if (check_pterm) {
2153
158k
        if (mqc->bp + 2 < mqc->end) {
2154
13.1k
            if (p_manager_mutex) {
2155
13.1k
                opj_mutex_lock(p_manager_mutex);
2156
13.1k
            }
2157
13.1k
            opj_event_msg(p_manager, EVT_WARNING,
2158
13.1k
                          "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2159
13.1k
                          (int)(mqc->end - mqc->bp) - 2,
2160
13.1k
                          (int)(mqc->bp - mqc->start),
2161
13.1k
                          (int)(mqc->end - mqc->start));
2162
13.1k
            if (p_manager_mutex) {
2163
13.1k
                opj_mutex_unlock(p_manager_mutex);
2164
13.1k
            }
2165
145k
        } else if (mqc->end_of_byte_stream_counter > 2) {
2166
125k
            if (p_manager_mutex) {
2167
125k
                opj_mutex_lock(p_manager_mutex);
2168
125k
            }
2169
125k
            opj_event_msg(p_manager, EVT_WARNING,
2170
125k
                          "PTERM check failure: %d synthesized 0xFF markers read\n",
2171
125k
                          mqc->end_of_byte_stream_counter);
2172
125k
            if (p_manager_mutex) {
2173
125k
                opj_mutex_unlock(p_manager_mutex);
2174
125k
            }
2175
125k
        }
2176
158k
    }
2177
2178
    /* Restore original t1->data is needed */
2179
468k
    if (cblk->decoded_data) {
2180
235k
        t1->data = original_t1_data;
2181
235k
    }
2182
2183
468k
    return OPJ_TRUE;
2184
468k
}
2185
2186
2187
typedef struct {
2188
    OPJ_UINT32 compno;
2189
    OPJ_UINT32 resno;
2190
    opj_tcd_cblk_enc_t* cblk;
2191
    opj_tcd_tile_t *tile;
2192
    opj_tcd_band_t* band;
2193
    opj_tcd_tilecomp_t* tilec;
2194
    opj_tccp_t* tccp;
2195
    const OPJ_FLOAT64 * mct_norms;
2196
    OPJ_UINT32 mct_numcomps;
2197
    volatile OPJ_BOOL* pret;
2198
    opj_mutex_t* mutex;
2199
} opj_t1_cblk_encode_processing_job_t;
2200
2201
/** Procedure to deal with a asynchronous code-block encoding job.
2202
 *
2203
 * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2204
 * @param tls       TLS handle.
2205
 */
2206
static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
2207
2.37M
{
2208
2.37M
    opj_t1_cblk_encode_processing_job_t* job =
2209
2.37M
        (opj_t1_cblk_encode_processing_job_t*)user_data;
2210
2.37M
    opj_tcd_cblk_enc_t* cblk = job->cblk;
2211
2.37M
    const opj_tcd_band_t* band = job->band;
2212
2.37M
    const opj_tcd_tilecomp_t* tilec = job->tilec;
2213
2.37M
    const opj_tccp_t* tccp = job->tccp;
2214
2.37M
    const OPJ_UINT32 resno = job->resno;
2215
2.37M
    opj_t1_t* t1;
2216
2.37M
    const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2217
2218
2.37M
    OPJ_INT32* OPJ_RESTRICT tiledp;
2219
2.37M
    OPJ_UINT32 cblk_w;
2220
2.37M
    OPJ_UINT32 cblk_h;
2221
2.37M
    OPJ_UINT32 i, j;
2222
2223
2.37M
    OPJ_INT32 x = cblk->x0 - band->x0;
2224
2.37M
    OPJ_INT32 y = cblk->y0 - band->y0;
2225
2226
2.37M
    if (!*(job->pret)) {
2227
0
        opj_free(job);
2228
0
        return;
2229
0
    }
2230
2231
2.37M
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2232
2.37M
    if (t1 == NULL) {
2233
11.7k
        t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2234
11.7k
        opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2235
11.7k
    }
2236
2237
2.37M
    if (band->bandno & 1) {
2238
1.55M
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2239
1.55M
        x += pres->x1 - pres->x0;
2240
1.55M
    }
2241
2.37M
    if (band->bandno & 2) {
2242
1.56M
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2243
1.56M
        y += pres->y1 - pres->y0;
2244
1.56M
    }
2245
2246
2.37M
    if (!opj_t1_allocate_buffers(
2247
2.37M
                t1,
2248
2.37M
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2249
2.37M
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2250
0
        *(job->pret) = OPJ_FALSE;
2251
0
        opj_free(job);
2252
0
        return;
2253
0
    }
2254
2255
2.37M
    cblk_w = t1->w;
2256
2.37M
    cblk_h = t1->h;
2257
2258
2.37M
    tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2259
2260
2.37M
    if (tccp->qmfbid == 1) {
2261
        /* Do multiplication on unsigned type, even if the
2262
            * underlying type is signed, to avoid potential
2263
            * int overflow on large value (the output will be
2264
            * incorrect in such situation, but whatever...)
2265
            * This assumes complement-to-2 signed integer
2266
            * representation
2267
            * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2268
            */
2269
1.00M
        OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2270
1.00M
        OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
2271
        /* Change from "natural" order to "zigzag" order of T1 passes */
2272
13.9M
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2273
#if defined(__AVX512F__)
2274
            const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13);
2275
            const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0);
2276
            OPJ_UINT32* ptr = tiledp_u;
2277
            for (i = 0; i < cblk_w / 16; ++i) {
2278
                //                      INPUT                                        OUTPUT
2279
                // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F   00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
2280
                // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F   04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
2281
                // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F   08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
2282
                // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F   0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
2283
                __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2284
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2285
                __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2286
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2287
                __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2288
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2289
                __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2290
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2291
2292
                __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2);
2293
                __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4);
2294
                __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2);
2295
                __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4);
2296
2297
                in1 = _mm512_unpacklo_epi64(tmp1, tmp2);
2298
                in2 = _mm512_unpacklo_epi64(tmp3, tmp4);
2299
                in3 = _mm512_unpackhi_epi64(tmp1, tmp2);
2300
                in4 = _mm512_unpackhi_epi64(tmp3, tmp4);
2301
2302
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1));
2303
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3));
2304
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2));
2305
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4));
2306
2307
                tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3);
2308
                tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4);
2309
2310
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1));
2311
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2));
2312
                _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1,
2313
                                    0x1));
2314
                _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2,
2315
                                    0x1));
2316
                _mm256_storeu_si256((__m256i*)(t1data + 48),
2317
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3)));
2318
                _mm256_storeu_si256((__m256i*)(t1data + 56),
2319
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4)));
2320
                t1data += 64;
2321
                ptr += 16;
2322
            }
2323
            for (i = 0; i < cblk_w % 16; ++i) {
2324
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2325
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2326
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2327
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2328
                t1data += 4;
2329
                ptr += 1;
2330
            }
2331
#elif defined(__AVX2__)
2332
            OPJ_UINT32* ptr = tiledp_u;
2333
            for (i = 0; i < cblk_w / 8; ++i) {
2334
                //          INPUT                  OUTPUT
2335
                // 00 01 02 03 04 05 06 07   00 10 20 30 01 11 21 31
2336
                // 10 11 12 13 14 15 16 17   02 12 22 32 03 13 23 33
2337
                // 20 21 22 23 24 25 26 27   04 14 24 34 05 15 25 35
2338
                // 30 31 32 33 34 35 36 37   06 16 26 36 07 17 27 37
2339
                __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2340
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2341
                __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2342
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2343
                __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2344
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2345
                __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2346
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2347
2348
                __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2);
2349
                __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4);
2350
                __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2);
2351
                __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4);
2352
2353
                in1 = _mm256_unpacklo_epi64(tmp1, tmp2);
2354
                in2 = _mm256_unpacklo_epi64(tmp3, tmp4);
2355
                in3 = _mm256_unpackhi_epi64(tmp1, tmp2);
2356
                in4 = _mm256_unpackhi_epi64(tmp3, tmp4);
2357
2358
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1));
2359
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3));
2360
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2));
2361
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4));
2362
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3,
2363
                                    0x31));
2364
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4,
2365
                                    0x31));
2366
                t1data += 32;
2367
                ptr += 8;
2368
            }
2369
            for (i = 0; i < cblk_w % 8; ++i) {
2370
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2371
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2372
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2373
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2374
                t1data += 4;
2375
                ptr += 1;
2376
            }
2377
#else
2378
766M
            for (i = 0; i < cblk_w; ++i) {
2379
753M
                t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2380
753M
                t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2381
753M
                t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2382
753M
                t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2383
753M
                t1data += 4;
2384
753M
            }
2385
12.9M
#endif
2386
12.9M
        }
2387
1.00M
        if (j < cblk_h) {
2388
7.77M
            for (i = 0; i < cblk_w; ++i) {
2389
7.59M
                OPJ_UINT32 k;
2390
21.3M
                for (k = j; k < cblk_h; k++) {
2391
13.7M
                    t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
2392
13.7M
                    t1data ++;
2393
13.7M
                }
2394
7.59M
            }
2395
181k
        }
2396
1.36M
    } else {        /* if (tccp->qmfbid == 0) */
2397
1.36M
        OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
2398
1.36M
        OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
2399
        /* Change from "natural" order to "zigzag" order of T1 passes */
2400
11.8M
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2401
335M
            for (i = 0; i < cblk_w; ++i) {
2402
325M
                t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
2403
325M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2404
325M
                t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
2405
325M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2406
325M
                t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
2407
325M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2408
325M
                t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
2409
325M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2410
325M
                t1data += 4;
2411
325M
            }
2412
10.5M
        }
2413
1.36M
        if (j < cblk_h) {
2414
1.71M
            for (i = 0; i < cblk_w; ++i) {
2415
1.66M
                OPJ_UINT32 k;
2416
5.59M
                for (k = j; k < cblk_h; k++) {
2417
3.92M
                    t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
2418
3.92M
                                                      * (1 << T1_NMSEDEC_FRACBITS));
2419
3.92M
                    t1data ++;
2420
3.92M
                }
2421
1.66M
            }
2422
57.0k
        }
2423
1.36M
    }
2424
2425
2.37M
    {
2426
2.37M
        OPJ_FLOAT64 cumwmsedec =
2427
2.37M
            opj_t1_encode_cblk(
2428
2.37M
                t1,
2429
2.37M
                cblk,
2430
2.37M
                band->bandno,
2431
2.37M
                job->compno,
2432
2.37M
                tilec->numresolutions - 1 - resno,
2433
2.37M
                tccp->qmfbid,
2434
2.37M
                band->stepsize,
2435
2.37M
                tccp->cblksty,
2436
2.37M
                job->tile->numcomps,
2437
2.37M
                job->mct_norms,
2438
2.37M
                job->mct_numcomps);
2439
2.37M
        if (job->mutex) {
2440
2.37M
            opj_mutex_lock(job->mutex);
2441
2.37M
        }
2442
2.37M
        job->tile->distotile += cumwmsedec;
2443
2.37M
        if (job->mutex) {
2444
2.37M
            opj_mutex_unlock(job->mutex);
2445
2.37M
        }
2446
2.37M
    }
2447
2448
2.37M
    opj_free(job);
2449
2.37M
}
2450
2451
2452
OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2453
                             opj_tcd_tile_t *tile,
2454
                             opj_tcp_t *tcp,
2455
                             const OPJ_FLOAT64 * mct_norms,
2456
                             OPJ_UINT32 mct_numcomps
2457
                            )
2458
11.7k
{
2459
11.7k
    volatile OPJ_BOOL ret = OPJ_TRUE;
2460
11.7k
    opj_thread_pool_t* tp = tcd->thread_pool;
2461
11.7k
    OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2462
11.7k
    opj_mutex_t* mutex = opj_mutex_create();
2463
2464
11.7k
    tile->distotile = 0;
2465
2466
24.9k
    for (compno = 0; compno < tile->numcomps; ++compno) {
2467
13.1k
        opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2468
13.1k
        opj_tccp_t* tccp = &tcp->tccps[compno];
2469
2470
74.2k
        for (resno = 0; resno < tilec->numresolutions; ++resno) {
2471
61.0k
            opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2472
2473
217k
            for (bandno = 0; bandno < res->numbands; ++bandno) {
2474
156k
                opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2475
2476
                /* Skip empty bands */
2477
156k
                if (opj_tcd_is_band_empty(band)) {
2478
0
                    continue;
2479
0
                }
2480
419k
                for (precno = 0; precno < res->pw * res->ph; ++precno) {
2481
263k
                    opj_tcd_precinct_t *prc = &band->precincts[precno];
2482
2483
2.63M
                    for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2484
2.37M
                        opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2485
2486
2.37M
                        opj_t1_cblk_encode_processing_job_t* job =
2487
2.37M
                            (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2488
2.37M
                                    sizeof(opj_t1_cblk_encode_processing_job_t));
2489
2.37M
                        if (!job) {
2490
0
                            ret = OPJ_FALSE;
2491
0
                            goto end;
2492
0
                        }
2493
2.37M
                        job->compno = compno;
2494
2.37M
                        job->tile = tile;
2495
2.37M
                        job->resno = resno;
2496
2.37M
                        job->cblk = cblk;
2497
2.37M
                        job->band = band;
2498
2.37M
                        job->tilec = tilec;
2499
2.37M
                        job->tccp = tccp;
2500
2.37M
                        job->mct_norms = mct_norms;
2501
2.37M
                        job->mct_numcomps = mct_numcomps;
2502
2.37M
                        job->pret = &ret;
2503
2.37M
                        job->mutex = mutex;
2504
2.37M
                        opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
2505
2506
2.37M
                    } /* cblkno */
2507
263k
                } /* precno */
2508
156k
            } /* bandno */
2509
61.0k
        } /* resno  */
2510
13.1k
    } /* compno  */
2511
2512
11.7k
end:
2513
11.7k
    opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2514
11.7k
    if (mutex) {
2515
11.7k
        opj_mutex_destroy(mutex);
2516
11.7k
    }
2517
2518
11.7k
    return ret;
2519
11.7k
}
2520
2521
/* Returns whether the pass (bpno, passtype) is terminated */
2522
static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2523
                                   OPJ_UINT32 cblksty,
2524
                                   OPJ_INT32 bpno,
2525
                                   OPJ_UINT32 passtype)
2526
25.2M
{
2527
    /* Is it the last cleanup pass ? */
2528
25.2M
    if (passtype == 2 && bpno == 0) {
2529
899k
        return OPJ_TRUE;
2530
899k
    }
2531
2532
24.3M
    if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2533
0
        return OPJ_TRUE;
2534
0
    }
2535
2536
24.3M
    if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2537
        /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2538
0
        if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2539
0
            return OPJ_TRUE;
2540
0
        }
2541
        /* and beyond terminate all the magnitude refinement passes (in raw) */
2542
        /* and cleanup passes (in MQC) */
2543
0
        if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2544
0
            return OPJ_TRUE;
2545
0
        }
2546
0
    }
2547
2548
24.3M
    return OPJ_FALSE;
2549
24.3M
}
2550
2551
2552
static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2553
                                      opj_tcd_cblk_enc_t* cblk,
2554
                                      OPJ_UINT32 orient,
2555
                                      OPJ_UINT32 compno,
2556
                                      OPJ_UINT32 level,
2557
                                      OPJ_UINT32 qmfbid,
2558
                                      OPJ_FLOAT64 stepsize,
2559
                                      OPJ_UINT32 cblksty,
2560
                                      OPJ_UINT32 numcomps,
2561
                                      const OPJ_FLOAT64 * mct_norms,
2562
                                      OPJ_UINT32 mct_numcomps)
2563
2.37M
{
2564
2.37M
    OPJ_FLOAT64 cumwmsedec = 0.0;
2565
2566
2.37M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2567
2568
2.37M
    OPJ_UINT32 passno;
2569
2.37M
    OPJ_INT32 bpno;
2570
2.37M
    OPJ_UINT32 passtype;
2571
2.37M
    OPJ_INT32 nmsedec = 0;
2572
2.37M
    OPJ_INT32 max;
2573
2.37M
    OPJ_UINT32 i, j;
2574
2.37M
    OPJ_BYTE type = T1_TYPE_MQ;
2575
2.37M
    OPJ_FLOAT64 tempwmsedec;
2576
2.37M
    OPJ_INT32* datap;
2577
2578
#ifdef EXTRA_DEBUG
2579
    printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2580
           cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2581
#endif
2582
2583
2.37M
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2584
2585
2.37M
    max = 0;
2586
2.37M
    datap = t1->data;
2587
96.7M
    for (j = 0; j < t1->h; ++j) {
2588
94.3M
        const OPJ_UINT32 w = t1->w;
2589
4.42G
        for (i = 0; i < w; ++i, ++datap) {
2590
4.33G
            OPJ_INT32 tmp = *datap;
2591
4.33G
            if (tmp < 0) {
2592
304M
                OPJ_UINT32 tmp_unsigned;
2593
304M
                if (tmp == INT_MIN) {
2594
                    /* To avoid undefined behaviour when negating INT_MIN */
2595
                    /* but if we go here, it means we have supplied an input */
2596
                    /* with more bit depth than we we can really support. */
2597
                    /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */
2598
135k
                    tmp = INT_MIN + 1;
2599
135k
                }
2600
304M
                max = opj_int_max(max, -tmp);
2601
304M
                tmp_unsigned = opj_to_smr(tmp);
2602
304M
                memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
2603
4.02G
            } else {
2604
4.02G
                max = opj_int_max(max, tmp);
2605
4.02G
            }
2606
4.33G
        }
2607
94.3M
    }
2608
2609
2.37M
    cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2610
1.43M
                                      T1_NMSEDEC_FRACBITS) : 0;
2611
2.37M
    if (cblk->numbps == 0) {
2612
1.45M
        cblk->totalpasses = 0;
2613
1.45M
        return cumwmsedec;
2614
1.45M
    }
2615
2616
916k
    bpno = (OPJ_INT32)(cblk->numbps - 1);
2617
916k
    passtype = 2;
2618
2619
916k
    opj_mqc_resetstates(mqc);
2620
916k
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2621
916k
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2622
916k
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2623
916k
    opj_mqc_init_enc(mqc, cblk->data);
2624
2625
26.2M
    for (passno = 0; bpno >= 0; ++passno) {
2626
25.2M
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2627
25.2M
        type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2628
25.2M
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2629
2630
        /* If the previous pass was terminating, we need to reset the encoder */
2631
25.2M
        if (passno > 0 && cblk->passes[passno - 1].term) {
2632
0
            if (type == T1_TYPE_RAW) {
2633
0
                opj_mqc_bypass_init_enc(mqc);
2634
0
            } else {
2635
0
                opj_mqc_restart_init_enc(mqc);
2636
0
            }
2637
0
        }
2638
2639
25.2M
        switch (passtype) {
2640
8.12M
        case 0:
2641
8.12M
            opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2642
8.12M
            break;
2643
8.12M
        case 1:
2644
8.12M
            opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2645
8.12M
            break;
2646
9.02M
        case 2:
2647
9.02M
            opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2648
            /* code switch SEGMARK (i.e. SEGSYM) */
2649
9.02M
            if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2650
0
                opj_mqc_segmark_enc(mqc);
2651
0
            }
2652
9.02M
            break;
2653
25.2M
        }
2654
2655
25.2M
        tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2656
25.2M
                                        stepsize, numcomps, mct_norms, mct_numcomps) ;
2657
25.2M
        cumwmsedec += tempwmsedec;
2658
25.2M
        pass->distortiondec = cumwmsedec;
2659
2660
25.2M
        if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2661
            /* If it is a terminated pass, terminate it */
2662
899k
            if (type == T1_TYPE_RAW) {
2663
0
                opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2664
899k
            } else {
2665
899k
                if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2666
0
                    opj_mqc_erterm_enc(mqc);
2667
899k
                } else {
2668
899k
                    opj_mqc_flush(mqc);
2669
899k
                }
2670
899k
            }
2671
899k
            pass->term = 1;
2672
899k
            pass->rate = opj_mqc_numbytes(mqc);
2673
24.3M
        } else {
2674
            /* Non terminated pass */
2675
24.3M
            OPJ_UINT32 rate_extra_bytes;
2676
24.3M
            if (type == T1_TYPE_RAW) {
2677
0
                rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2678
0
                                       mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2679
24.3M
            } else {
2680
24.3M
                rate_extra_bytes = 3;
2681
24.3M
            }
2682
24.3M
            pass->term = 0;
2683
24.3M
            pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2684
24.3M
        }
2685
2686
25.2M
        if (++passtype == 3) {
2687
9.02M
            passtype = 0;
2688
9.02M
            bpno--;
2689
9.02M
        }
2690
2691
        /* Code-switch "RESET" */
2692
25.2M
        if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2693
0
            opj_mqc_reset_enc(mqc);
2694
0
        }
2695
25.2M
    }
2696
2697
916k
    cblk->totalpasses = passno;
2698
2699
916k
    if (cblk->totalpasses) {
2700
        /* Make sure that pass rates are increasing */
2701
899k
        OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2702
26.1M
        for (passno = cblk->totalpasses; passno > 0;) {
2703
25.2M
            opj_tcd_pass_t *pass = &cblk->passes[--passno];
2704
25.2M
            if (pass->rate > last_pass_rate) {
2705
564k
                pass->rate = last_pass_rate;
2706
24.7M
            } else {
2707
24.7M
                last_pass_rate = pass->rate;
2708
24.7M
            }
2709
25.2M
        }
2710
899k
    }
2711
2712
26.2M
    for (passno = 0; passno < cblk->totalpasses; passno++) {
2713
25.2M
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2714
2715
        /* Prevent generation of FF as last data byte of a pass*/
2716
        /* For terminating passes, the flushing procedure ensured this already */
2717
25.2M
        assert(pass->rate > 0);
2718
25.2M
        if (cblk->data[pass->rate - 1] == 0xFF) {
2719
221k
            pass->rate--;
2720
221k
        }
2721
25.2M
        pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2722
25.2M
    }
2723
2724
#ifdef EXTRA_DEBUG
2725
    printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2726
2727
    /* Check that there not 0xff >=0x90 sequences */
2728
    if (cblk->totalpasses) {
2729
        OPJ_UINT32 i;
2730
        OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2731
        for (i = 1; i < len; ++i) {
2732
            if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2733
                printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2734
                abort();
2735
            }
2736
        }
2737
    }
2738
#endif
2739
2740
916k
    return cumwmsedec;
2741
916k
}