Coverage Report

Created: 2025-10-12 07:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openjpeg/src/lib/openjp2/t1.c
Line
Count
Source
1
/*
2
 * The copyright in this software is being made available under the 2-clauses
3
 * BSD License, included below. This software may be subject to other third
4
 * party and contributor rights, including patent rights, and no such rights
5
 * are granted under this license.
6
 *
7
 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8
 * Copyright (c) 2002-2014, Professor Benoit Macq
9
 * Copyright (c) 2001-2003, David Janssens
10
 * Copyright (c) 2002-2003, Yannick Verschueren
11
 * Copyright (c) 2003-2007, Francois-Olivier Devaux
12
 * Copyright (c) 2003-2014, Antonin Descampe
13
 * Copyright (c) 2005, Herve Drolon, FreeImage Team
14
 * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15
 * Copyright (c) 2012, Carl Hetherington
16
 * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17
 * All rights reserved.
18
 *
19
 * Redistribution and use in source and binary forms, with or without
20
 * modification, are permitted provided that the following conditions
21
 * are met:
22
 * 1. Redistributions of source code must retain the above copyright
23
 *    notice, this list of conditions and the following disclaimer.
24
 * 2. Redistributions in binary form must reproduce the above copyright
25
 *    notice, this list of conditions and the following disclaimer in the
26
 *    documentation and/or other materials provided with the distribution.
27
 *
28
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
 * POSSIBILITY OF SUCH DAMAGE.
39
 */
40
41
#define OPJ_SKIP_POISON
42
#include "opj_includes.h"
43
44
#ifdef __SSE__
45
#include <xmmintrin.h>
46
#endif
47
#ifdef __SSE2__
48
#include <emmintrin.h>
49
#endif
50
#if (defined(__AVX2__) || defined(__AVX512F__))
51
#include <immintrin.h>
52
#endif
53
54
#if defined(__GNUC__)
55
#pragma GCC poison malloc calloc realloc free
56
#endif
57
58
#include "t1_luts.h"
59
60
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
61
/*@{*/
62
63
26.8M
#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
64
65
14.9G
#define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
66
67
/* Macros to deal with signed integer with just MSB bit set for
68
 * negative values (smr = signed magnitude representation) */
69
19.2G
#define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
70
572M
#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
71
319M
#define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
72
73
74
/** @name Local static functions */
75
/*@{*/
76
77
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
78
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
79
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
80
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
81
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
82
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
83
                                       OPJ_UINT32 vsc);
84
85
86
/**
87
Decode significant pass
88
*/
89
90
static INLINE void opj_t1_dec_sigpass_step_raw(
91
    opj_t1_t *t1,
92
    opj_flag_t *flagsp,
93
    OPJ_INT32 *datap,
94
    OPJ_INT32 oneplushalf,
95
    OPJ_UINT32 vsc,
96
    OPJ_UINT32 row);
97
static INLINE void opj_t1_dec_sigpass_step_mqc(
98
    opj_t1_t *t1,
99
    opj_flag_t *flagsp,
100
    OPJ_INT32 *datap,
101
    OPJ_INT32 oneplushalf,
102
    OPJ_UINT32 row,
103
    OPJ_UINT32 flags_stride,
104
    OPJ_UINT32 vsc);
105
106
/**
107
Encode significant pass
108
*/
109
static void opj_t1_enc_sigpass(opj_t1_t *t1,
110
                               OPJ_INT32 bpno,
111
                               OPJ_INT32 *nmsedec,
112
                               OPJ_BYTE type,
113
                               OPJ_UINT32 cblksty);
114
115
/**
116
Decode significant pass
117
*/
118
static void opj_t1_dec_sigpass_raw(
119
    opj_t1_t *t1,
120
    OPJ_INT32 bpno,
121
    OPJ_INT32 cblksty);
122
123
/**
124
Encode refinement pass
125
*/
126
static void opj_t1_enc_refpass(opj_t1_t *t1,
127
                               OPJ_INT32 bpno,
128
                               OPJ_INT32 *nmsedec,
129
                               OPJ_BYTE type);
130
131
/**
132
Decode refinement pass
133
*/
134
static void opj_t1_dec_refpass_raw(
135
    opj_t1_t *t1,
136
    OPJ_INT32 bpno);
137
138
139
/**
140
Decode refinement pass
141
*/
142
143
static INLINE void  opj_t1_dec_refpass_step_raw(
144
    opj_t1_t *t1,
145
    opj_flag_t *flagsp,
146
    OPJ_INT32 *datap,
147
    OPJ_INT32 poshalf,
148
    OPJ_UINT32 row);
149
static INLINE void opj_t1_dec_refpass_step_mqc(
150
    opj_t1_t *t1,
151
    opj_flag_t *flagsp,
152
    OPJ_INT32 *datap,
153
    OPJ_INT32 poshalf,
154
    OPJ_UINT32 row);
155
156
157
/**
158
Decode clean-up pass
159
*/
160
161
static void opj_t1_dec_clnpass_step(
162
    opj_t1_t *t1,
163
    opj_flag_t *flagsp,
164
    OPJ_INT32 *datap,
165
    OPJ_INT32 oneplushalf,
166
    OPJ_UINT32 row,
167
    OPJ_UINT32 vsc);
168
169
/**
170
Encode clean-up pass
171
*/
172
static void opj_t1_enc_clnpass(
173
    opj_t1_t *t1,
174
    OPJ_INT32 bpno,
175
    OPJ_INT32 *nmsedec,
176
    OPJ_UINT32 cblksty);
177
178
static OPJ_FLOAT64 opj_t1_getwmsedec(
179
    OPJ_INT32 nmsedec,
180
    OPJ_UINT32 compno,
181
    OPJ_UINT32 level,
182
    OPJ_UINT32 orient,
183
    OPJ_INT32 bpno,
184
    OPJ_UINT32 qmfbid,
185
    OPJ_FLOAT64 stepsize,
186
    OPJ_UINT32 numcomps,
187
    const OPJ_FLOAT64 * mct_norms,
188
    OPJ_UINT32 mct_numcomps);
189
190
/** Return "cumwmsedec" that should be used to increase tile->distotile */
191
static double opj_t1_encode_cblk(opj_t1_t *t1,
192
                                 opj_tcd_cblk_enc_t* cblk,
193
                                 OPJ_UINT32 orient,
194
                                 OPJ_UINT32 compno,
195
                                 OPJ_UINT32 level,
196
                                 OPJ_UINT32 qmfbid,
197
                                 OPJ_FLOAT64 stepsize,
198
                                 OPJ_UINT32 cblksty,
199
                                 OPJ_UINT32 numcomps,
200
                                 const OPJ_FLOAT64 * mct_norms,
201
                                 OPJ_UINT32 mct_numcomps);
202
203
/**
204
Decode 1 code-block
205
@param t1 T1 handle
206
@param cblk Code-block coding parameters
207
@param orient
208
@param roishift Region of interest shifting value
209
@param cblksty Code-block style
210
@param p_manager the event manager
211
@param p_manager_mutex mutex for the event manager
212
@param check_pterm whether PTERM correct termination should be checked
213
*/
214
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
215
                                   opj_tcd_cblk_dec_t* cblk,
216
                                   OPJ_UINT32 orient,
217
                                   OPJ_UINT32 roishift,
218
                                   OPJ_UINT32 cblksty,
219
                                   opj_event_mgr_t *p_manager,
220
                                   opj_mutex_t* p_manager_mutex,
221
                                   OPJ_BOOL check_pterm);
222
223
/**
224
Decode 1 HT code-block
225
@param t1 T1 handle
226
@param cblk Code-block coding parameters
227
@param orient
228
@param roishift Region of interest shifting value
229
@param cblksty Code-block style
230
@param p_manager the event manager
231
@param p_manager_mutex mutex for the event manager
232
@param check_pterm whether PTERM correct termination should be checked
233
*/
234
OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
235
                               opj_tcd_cblk_dec_t* cblk,
236
                               OPJ_UINT32 orient,
237
                               OPJ_UINT32 roishift,
238
                               OPJ_UINT32 cblksty,
239
                               opj_event_mgr_t *p_manager,
240
                               opj_mutex_t* p_manager_mutex,
241
                               OPJ_BOOL check_pterm);
242
243
244
static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
245
                                        OPJ_UINT32 w,
246
                                        OPJ_UINT32 h);
247
248
/*@}*/
249
250
/*@}*/
251
252
/* ----------------------------------------------------------------------- */
253
254
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
255
5.18G
{
256
5.18G
    return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
257
5.18G
}
258
259
static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
260
        OPJ_UINT32 pfX,
261
        OPJ_UINT32 nfX,
262
        OPJ_UINT32 ci)
263
1.03G
{
264
    /*
265
      0 pfX T1_CHI_THIS           T1_LUT_SGN_W
266
      1 tfX T1_SIGMA_1            T1_LUT_SIG_N
267
      2 nfX T1_CHI_THIS           T1_LUT_SGN_E
268
      3 tfX T1_SIGMA_3            T1_LUT_SIG_W
269
      4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
270
      5 tfX T1_SIGMA_5            T1_LUT_SIG_E
271
      6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
272
      7 tfX T1_SIGMA_7            T1_LUT_SIG_S
273
    */
274
275
1.03G
    OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
276
1.03G
                                         T1_SIGMA_7);
277
278
1.03G
    lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
279
1.03G
    lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
280
1.03G
    if (ci == 0U) {
281
263M
        lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
282
775M
    } else {
283
775M
        lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
284
775M
    }
285
1.03G
    lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
286
1.03G
    return lu;
287
1.03G
}
288
289
static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
290
1.03G
{
291
1.03G
    return lut_ctxno_sc[lu];
292
1.03G
}
293
294
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
295
5.69G
{
296
5.69G
    OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
297
5.69G
    OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
298
5.69G
    return tmp2;
299
5.69G
}
300
301
static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
302
1.03G
{
303
1.03G
    return lut_spb[lu];
304
1.03G
}
305
306
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
307
572M
{
308
572M
    if (bitpos > 0) {
309
518M
        return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
310
518M
    }
311
312
54.6M
    return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
313
572M
}
314
315
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
316
4.27G
{
317
4.27G
    if (bitpos > 0) {
318
3.75G
        return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
319
3.75G
    }
320
321
518M
    return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
322
4.27G
}
323
324
1.04G
#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
325
1.04G
{ \
326
1.04G
    /* east */ \
327
1.04G
    flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
328
1.04G
 \
329
1.04G
    /* mark target as significant */ \
330
1.04G
    flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
331
1.04G
 \
332
1.04G
    /* west */ \
333
1.04G
    flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
334
1.04G
 \
335
1.04G
    /* north-west, north, north-east */ \
336
1.04G
    if (ci == 0U && !(vsc)) { \
337
202M
        opj_flag_t* north = flagsp - (stride); \
338
202M
        *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
339
202M
        north[-1] |= T1_SIGMA_17; \
340
202M
        north[1] |= T1_SIGMA_15; \
341
202M
    } \
342
1.04G
 \
343
1.04G
    /* south-west, south, south-east */ \
344
1.04G
    if (ci == 3U) { \
345
259M
        opj_flag_t* south = flagsp + (stride); \
346
259M
        *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
347
259M
        south[-1] |= T1_SIGMA_2; \
348
259M
        south[1] |= T1_SIGMA_0; \
349
259M
    } \
350
1.04G
}
351
352
353
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
354
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
355
                                       OPJ_UINT32 vsc)
356
576M
{
357
576M
    opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
358
576M
}
359
360
/**
361
Encode significant pass
362
*/
363
7.89G
#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
364
7.89G
{ \
365
7.89G
    OPJ_UINT32 v; \
366
7.89G
    const OPJ_UINT32 ci = (ciIn); \
367
7.89G
    const OPJ_UINT32 vsc = (vscIn); \
368
7.89G
    const OPJ_INT32* l_datap = (datapIn); \
369
7.89G
    opj_flag_t* flagsp = (flagspIn); \
370
7.89G
    OPJ_UINT32 const flags = *flagsp; \
371
7.89G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
372
7.89G
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
373
2.57G
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
374
2.57G
        v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
375
2.57G
/* #ifdef DEBUG_ENC_SIG */ \
376
2.57G
/*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
377
2.57G
/* #endif */ \
378
2.57G
        opj_t1_setcurctx(curctx, ctxt1); \
379
2.57G
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
380
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
381
2.57G
        } else { \
382
2.57G
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
383
2.57G
        } \
384
2.57G
        if (v) { \
385
446M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
386
446M
                                *flagsp, \
387
446M
                                flagsp[-1], flagsp[1], \
388
446M
                                ci); \
389
446M
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
390
446M
            v = opj_smr_sign(*l_datap); \
391
446M
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
392
446M
                                              (OPJ_UINT32)bpno); \
393
446M
/* #ifdef DEBUG_ENC_SIG */ \
394
446M
/*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
395
446M
/* #endif */ \
396
446M
            opj_t1_setcurctx(curctx, ctxt2); \
397
446M
            if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
398
0
                opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
399
446M
            } else { \
400
446M
                OPJ_UINT32 spb = opj_t1_getspb(lu); \
401
446M
/* #ifdef DEBUG_ENC_SIG */ \
402
446M
/*                fprintf(stderr, "   spb=%d\n", spb); */ \
403
446M
/* #endif */ \
404
446M
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
405
446M
            } \
406
446M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
407
446M
        } \
408
2.57G
        *flagsp |= T1_PI_THIS << (ci * 3U); \
409
2.57G
    } \
410
7.89G
}
411
412
static INLINE void opj_t1_dec_sigpass_step_raw(
413
    opj_t1_t *t1,
414
    opj_flag_t *flagsp,
415
    OPJ_INT32 *datap,
416
    OPJ_INT32 oneplushalf,
417
    OPJ_UINT32 vsc,
418
    OPJ_UINT32 ci)
419
46.8M
{
420
46.8M
    OPJ_UINT32 v;
421
46.8M
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
422
423
46.8M
    OPJ_UINT32 const flags = *flagsp;
424
425
46.8M
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
426
4.57M
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
427
3.84M
        if (opj_mqc_raw_decode(mqc)) {
428
3.48M
            v = opj_mqc_raw_decode(mqc);
429
3.48M
            *datap = v ? -oneplushalf : oneplushalf;
430
3.48M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
431
3.48M
        }
432
3.84M
        *flagsp |= T1_PI_THIS << (ci * 3U);
433
3.84M
    }
434
46.8M
}
435
436
#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
437
                                          data_stride, ci, mqc, curctx, \
438
1.99G
                                          v, a, c, ct, oneplushalf, vsc) \
439
1.99G
{ \
440
1.99G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
441
1.99G
        (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
442
459M
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
443
459M
        opj_t1_setcurctx(curctx, ctxt1); \
444
459M
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
445
459M
        if (v) { \
446
211M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
447
211M
                                flags, \
448
211M
                                flagsp[-1], flagsp[1], \
449
211M
                                ci); \
450
211M
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
451
211M
            OPJ_UINT32 spb = opj_t1_getspb(lu); \
452
211M
            opj_t1_setcurctx(curctx, ctxt2); \
453
211M
            opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
454
211M
            v = v ^ spb; \
455
211M
            data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
456
211M
            opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
457
211M
        } \
458
459M
        flags |= T1_PI_THIS << (ci * 3U); \
459
459M
    } \
460
1.99G
}
461
462
static INLINE void opj_t1_dec_sigpass_step_mqc(
463
    opj_t1_t *t1,
464
    opj_flag_t *flagsp,
465
    OPJ_INT32 *datap,
466
    OPJ_INT32 oneplushalf,
467
    OPJ_UINT32 ci,
468
    OPJ_UINT32 flags_stride,
469
    OPJ_UINT32 vsc)
470
30.8M
{
471
30.8M
    OPJ_UINT32 v;
472
473
30.8M
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
474
30.8M
    opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
475
30.8M
                                      0, ci, mqc, mqc->curctx,
476
30.8M
                                      v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
477
30.8M
}
478
479
static void opj_t1_enc_sigpass(opj_t1_t *t1,
480
                               OPJ_INT32 bpno,
481
                               OPJ_INT32 *nmsedec,
482
                               OPJ_BYTE type,
483
                               OPJ_UINT32 cblksty
484
                              )
485
8.62M
{
486
8.62M
    OPJ_UINT32 i, k;
487
8.62M
    OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
488
8.62M
    opj_flag_t* f = &T1_FLAGS(0, 0);
489
8.62M
    OPJ_UINT32 const extra = 2;
490
8.62M
    opj_mqc_t* mqc = &(t1->mqc);
491
8.62M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
492
8.62M
    const OPJ_INT32* datap = t1->data;
493
494
8.62M
    *nmsedec = 0;
495
#ifdef DEBUG_ENC_SIG
496
    fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
497
#endif
498
96.4M
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
499
87.8M
        const OPJ_UINT32 w = t1->w;
500
#ifdef DEBUG_ENC_SIG
501
        fprintf(stderr, " k=%d\n", k);
502
#endif
503
4.30G
        for (i = 0; i < w; ++i, ++f, datap += 4) {
504
#ifdef DEBUG_ENC_SIG
505
            fprintf(stderr, " i=%d\n", i);
506
#endif
507
4.21G
            if (*f == 0U) {
508
                /* Nothing to do for any of the 4 data points */
509
2.25G
                continue;
510
2.25G
            }
511
1.95G
            opj_t1_enc_sigpass_step_macro(
512
1.95G
                mqc, curctx, a, c, ct,
513
1.95G
                f,
514
1.95G
                &datap[0],
515
1.95G
                bpno,
516
1.95G
                one,
517
1.95G
                nmsedec,
518
1.95G
                type,
519
1.95G
                0, cblksty & J2K_CCP_CBLKSTY_VSC);
520
1.95G
            opj_t1_enc_sigpass_step_macro(
521
1.95G
                mqc, curctx, a, c, ct,
522
1.95G
                f,
523
1.95G
                &datap[1],
524
1.95G
                bpno,
525
1.95G
                one,
526
1.95G
                nmsedec,
527
1.95G
                type,
528
1.95G
                1, 0);
529
1.95G
            opj_t1_enc_sigpass_step_macro(
530
1.95G
                mqc, curctx, a, c, ct,
531
1.95G
                f,
532
1.95G
                &datap[2],
533
1.95G
                bpno,
534
1.95G
                one,
535
1.95G
                nmsedec,
536
1.95G
                type,
537
1.95G
                2, 0);
538
1.95G
            opj_t1_enc_sigpass_step_macro(
539
1.95G
                mqc, curctx, a, c, ct,
540
1.95G
                f,
541
1.95G
                &datap[3],
542
1.95G
                bpno,
543
1.95G
                one,
544
1.95G
                nmsedec,
545
1.95G
                type,
546
1.95G
                3, 0);
547
1.95G
        }
548
87.8M
    }
549
550
8.62M
    if (k < t1->h) {
551
891k
        OPJ_UINT32 j;
552
#ifdef DEBUG_ENC_SIG
553
        fprintf(stderr, " k=%d\n", k);
554
#endif
555
31.7M
        for (i = 0; i < t1->w; ++i, ++f) {
556
#ifdef DEBUG_ENC_SIG
557
            fprintf(stderr, " i=%d\n", i);
558
#endif
559
30.8M
            if (*f == 0U) {
560
                /* Nothing to do for any of the 4 data points */
561
0
                datap += (t1->h - k);
562
0
                continue;
563
0
            }
564
93.6M
            for (j = k; j < t1->h; ++j, ++datap) {
565
62.8M
                opj_t1_enc_sigpass_step_macro(
566
62.8M
                    mqc, curctx, a, c, ct,
567
62.8M
                    f,
568
62.8M
                    &datap[0],
569
62.8M
                    bpno,
570
62.8M
                    one,
571
62.8M
                    nmsedec,
572
62.8M
                    type,
573
62.8M
                    j - k,
574
62.8M
                    (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
575
62.8M
            }
576
30.8M
        }
577
891k
    }
578
579
8.62M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
580
8.62M
}
581
582
static void opj_t1_dec_sigpass_raw(
583
    opj_t1_t *t1,
584
    OPJ_INT32 bpno,
585
    OPJ_INT32 cblksty)
586
31.8k
{
587
31.8k
    OPJ_INT32 one, half, oneplushalf;
588
31.8k
    OPJ_UINT32 i, j, k;
589
31.8k
    OPJ_INT32 *data = t1->data;
590
31.8k
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
591
31.8k
    const OPJ_UINT32 l_w = t1->w;
592
31.8k
    one = 1 << bpno;
593
31.8k
    half = one >> 1;
594
31.8k
    oneplushalf = one | half;
595
596
317k
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
597
14.5M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
598
14.2M
            opj_flag_t flags = *flagsp;
599
14.2M
            if (flags != 0) {
600
10.7M
                opj_t1_dec_sigpass_step_raw(
601
10.7M
                    t1,
602
10.7M
                    flagsp,
603
10.7M
                    data,
604
10.7M
                    oneplushalf,
605
10.7M
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
606
10.7M
                    0U);
607
10.7M
                opj_t1_dec_sigpass_step_raw(
608
10.7M
                    t1,
609
10.7M
                    flagsp,
610
10.7M
                    data + l_w,
611
10.7M
                    oneplushalf,
612
10.7M
                    OPJ_FALSE, /* vsc */
613
10.7M
                    1U);
614
10.7M
                opj_t1_dec_sigpass_step_raw(
615
10.7M
                    t1,
616
10.7M
                    flagsp,
617
10.7M
                    data + 2 * l_w,
618
10.7M
                    oneplushalf,
619
10.7M
                    OPJ_FALSE, /* vsc */
620
10.7M
                    2U);
621
10.7M
                opj_t1_dec_sigpass_step_raw(
622
10.7M
                    t1,
623
10.7M
                    flagsp,
624
10.7M
                    data + 3 * l_w,
625
10.7M
                    oneplushalf,
626
10.7M
                    OPJ_FALSE, /* vsc */
627
10.7M
                    3U);
628
10.7M
            }
629
14.2M
        }
630
286k
    }
631
31.8k
    if (k < t1->h) {
632
2.19M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
633
6.18M
            for (j = 0; j < t1->h - k; ++j) {
634
4.00M
                opj_t1_dec_sigpass_step_raw(
635
4.00M
                    t1,
636
4.00M
                    flagsp,
637
4.00M
                    data + j * l_w,
638
4.00M
                    oneplushalf,
639
4.00M
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
640
4.00M
                    j);
641
4.00M
            }
642
2.17M
        }
643
17.5k
    }
644
31.8k
}
645
646
1.82M
#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
647
1.82M
{ \
648
1.82M
        OPJ_INT32 one, half, oneplushalf; \
649
1.82M
        OPJ_UINT32 i, j, k; \
650
1.82M
        register OPJ_INT32 *data = t1->data; \
651
1.82M
        register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
652
1.82M
        const OPJ_UINT32 l_w = w; \
653
1.82M
        opj_mqc_t* mqc = &(t1->mqc); \
654
1.82M
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
655
1.82M
        register OPJ_UINT32 v; \
656
1.82M
        one = 1 << bpno; \
657
1.82M
        half = one >> 1; \
658
1.82M
        oneplushalf = one | half; \
659
22.6M
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
660
829M
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
661
808M
                        opj_flag_t flags = *flagsp; \
662
808M
                        if( flags != 0 ) { \
663
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
664
490M
                                flags, flagsp, flags_stride, data, \
665
490M
                                l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
666
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
667
490M
                                flags, flagsp, flags_stride, data, \
668
490M
                                l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
669
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
670
490M
                                flags, flagsp, flags_stride, data, \
671
490M
                                l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
672
490M
                            opj_t1_dec_sigpass_step_mqc_macro( \
673
490M
                                flags, flagsp, flags_stride, data, \
674
490M
                                l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
675
490M
                            *flagsp = flags; \
676
490M
                        } \
677
808M
                } \
678
20.8M
        } \
679
1.82M
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
680
1.82M
        if( k < h ) { \
681
22.7M
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
682
52.9M
                for (j = 0; j < h - k; ++j) { \
683
30.8M
                        opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
684
30.8M
                            data + j * l_w, oneplushalf, j, flags_stride, vsc); \
685
30.8M
                } \
686
22.1M
            } \
687
619k
        } \
688
1.82M
}
689
690
static void opj_t1_dec_sigpass_mqc_64x64_novsc(
691
    opj_t1_t *t1,
692
    OPJ_INT32 bpno)
693
329k
{
694
329k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
695
329k
}
696
697
static void opj_t1_dec_sigpass_mqc_64x64_vsc(
698
    opj_t1_t *t1,
699
    OPJ_INT32 bpno)
700
337k
{
701
337k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
702
337k
}
703
704
static void opj_t1_dec_sigpass_mqc_generic_novsc(
705
    opj_t1_t *t1,
706
    OPJ_INT32 bpno)
707
610k
{
708
610k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
709
610k
                                    t1->w + 2U);
710
610k
}
711
712
static void opj_t1_dec_sigpass_mqc_generic_vsc(
713
    opj_t1_t *t1,
714
    OPJ_INT32 bpno)
715
548k
{
716
548k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
717
548k
                                    t1->w + 2U);
718
548k
}
719
720
static void opj_t1_dec_sigpass_mqc(
721
    opj_t1_t *t1,
722
    OPJ_INT32 bpno,
723
    OPJ_INT32 cblksty)
724
1.82M
{
725
1.82M
    if (t1->w == 64 && t1->h == 64) {
726
667k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
727
337k
            opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
728
337k
        } else {
729
329k
            opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
730
329k
        }
731
1.15M
    } else {
732
1.15M
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
733
548k
            opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
734
610k
        } else {
735
610k
            opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
736
610k
        }
737
1.15M
    }
738
1.82M
}
739
740
/**
741
Encode refinement pass step
742
*/
743
6.14G
#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
744
6.14G
{\
745
6.14G
    OPJ_UINT32 v; \
746
6.14G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
747
4.27G
        const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
748
4.27G
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
749
4.27G
        OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
750
4.27G
        *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
751
4.27G
                                          (OPJ_UINT32)bpno); \
752
4.27G
        v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
753
4.27G
/* #ifdef DEBUG_ENC_REF */ \
754
4.27G
/*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
755
4.27G
/* #endif */ \
756
4.27G
        opj_t1_setcurctx(curctx, ctxt); \
757
4.27G
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
758
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
759
4.27G
        } else { \
760
4.27G
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
761
4.27G
        } \
762
4.27G
        flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
763
4.27G
    } \
764
6.14G
}
765
766
767
static INLINE void opj_t1_dec_refpass_step_raw(
768
    opj_t1_t *t1,
769
    opj_flag_t *flagsp,
770
    OPJ_INT32 *datap,
771
    OPJ_INT32 poshalf,
772
    OPJ_UINT32 ci)
773
45.7M
{
774
45.7M
    OPJ_UINT32 v;
775
776
45.7M
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
777
778
45.7M
    if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
779
45.7M
            (T1_SIGMA_THIS << (ci * 3U))) {
780
41.0M
        v = opj_mqc_raw_decode(mqc);
781
41.0M
        *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
782
41.0M
        *flagsp |= T1_MU_THIS << (ci * 3U);
783
41.0M
    }
784
45.7M
}
785
786
#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
787
1.86G
                                          mqc, curctx, v, a, c, ct, poshalf) \
788
1.86G
{ \
789
1.86G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
790
1.86G
            (T1_SIGMA_THIS << (ci * 3U))) { \
791
1.42G
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
792
1.42G
        opj_t1_setcurctx(curctx, ctxt); \
793
1.42G
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
794
1.42G
        data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
795
1.42G
        flags |= T1_MU_THIS << (ci * 3U); \
796
1.42G
    } \
797
1.86G
}
798
799
static INLINE void opj_t1_dec_refpass_step_mqc(
800
    opj_t1_t *t1,
801
    opj_flag_t *flagsp,
802
    OPJ_INT32 *datap,
803
    OPJ_INT32 poshalf,
804
    OPJ_UINT32 ci)
805
29.0M
{
806
29.0M
    OPJ_UINT32 v;
807
808
29.0M
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
809
29.0M
    opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
810
29.0M
                                      mqc, mqc->curctx, v, mqc->a, mqc->c,
811
29.0M
                                      mqc->ct, poshalf);
812
29.0M
}
813
814
static void opj_t1_enc_refpass(
815
    opj_t1_t *t1,
816
    OPJ_INT32 bpno,
817
    OPJ_INT32 *nmsedec,
818
    OPJ_BYTE type)
819
8.62M
{
820
8.62M
    OPJ_UINT32 i, k;
821
8.62M
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
822
8.62M
    opj_flag_t* f = &T1_FLAGS(0, 0);
823
8.62M
    const OPJ_UINT32 extra = 2U;
824
8.62M
    opj_mqc_t* mqc = &(t1->mqc);
825
8.62M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
826
8.62M
    const OPJ_INT32* datap = t1->data;
827
828
8.62M
    *nmsedec = 0;
829
#ifdef DEBUG_ENC_REF
830
    fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
831
#endif
832
96.4M
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
833
#ifdef DEBUG_ENC_REF
834
        fprintf(stderr, " k=%d\n", k);
835
#endif
836
4.30G
        for (i = 0; i < t1->w; ++i, f++, datap += 4) {
837
4.21G
            const OPJ_UINT32 flags = *f;
838
4.21G
            OPJ_UINT32 flagsUpdated = flags;
839
#ifdef DEBUG_ENC_REF
840
            fprintf(stderr, " i=%d\n", i);
841
#endif
842
4.21G
            if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
843
                /* none significant */
844
2.63G
                continue;
845
2.63G
            }
846
1.58G
            if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
847
1.58G
                    (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
848
                /* all processed by sigpass */
849
54.0M
                continue;
850
54.0M
            }
851
852
1.53G
            opj_t1_enc_refpass_step_macro(
853
1.53G
                mqc, curctx, a, c, ct,
854
1.53G
                flags, flagsUpdated,
855
1.53G
                &datap[0],
856
1.53G
                bpno,
857
1.53G
                one,
858
1.53G
                nmsedec,
859
1.53G
                type,
860
1.53G
                0);
861
1.53G
            opj_t1_enc_refpass_step_macro(
862
1.53G
                mqc, curctx, a, c, ct,
863
1.53G
                flags, flagsUpdated,
864
1.53G
                &datap[1],
865
1.53G
                bpno,
866
1.53G
                one,
867
1.53G
                nmsedec,
868
1.53G
                type,
869
1.53G
                1);
870
1.53G
            opj_t1_enc_refpass_step_macro(
871
1.53G
                mqc, curctx, a, c, ct,
872
1.53G
                flags, flagsUpdated,
873
1.53G
                &datap[2],
874
1.53G
                bpno,
875
1.53G
                one,
876
1.53G
                nmsedec,
877
1.53G
                type,
878
1.53G
                2);
879
1.53G
            opj_t1_enc_refpass_step_macro(
880
1.53G
                mqc, curctx, a, c, ct,
881
1.53G
                flags, flagsUpdated,
882
1.53G
                &datap[3],
883
1.53G
                bpno,
884
1.53G
                one,
885
1.53G
                nmsedec,
886
1.53G
                type,
887
1.53G
                3);
888
1.53G
            *f = flagsUpdated;
889
1.53G
        }
890
87.8M
    }
891
892
8.62M
    if (k < t1->h) {
893
891k
        OPJ_UINT32 j;
894
891k
        const OPJ_UINT32 remaining_lines = t1->h - k;
895
#ifdef DEBUG_ENC_REF
896
        fprintf(stderr, " k=%d\n", k);
897
#endif
898
31.7M
        for (i = 0; i < t1->w; ++i, ++f) {
899
#ifdef DEBUG_ENC_REF
900
            fprintf(stderr, " i=%d\n", i);
901
#endif
902
30.8M
            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
903
                /* none significant */
904
19.0M
                datap += remaining_lines;
905
19.0M
                continue;
906
19.0M
            }
907
36.0M
            for (j = 0; j < remaining_lines; ++j, datap ++) {
908
24.2M
                opj_t1_enc_refpass_step_macro(
909
24.2M
                    mqc, curctx, a, c, ct,
910
24.2M
                    *f, *f,
911
24.2M
                    &datap[0],
912
24.2M
                    bpno,
913
24.2M
                    one,
914
24.2M
                    nmsedec,
915
24.2M
                    type,
916
24.2M
                    j);
917
24.2M
            }
918
11.8M
        }
919
891k
    }
920
921
8.62M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
922
8.62M
}
923
924
925
static void opj_t1_dec_refpass_raw(
926
    opj_t1_t *t1,
927
    OPJ_INT32 bpno)
928
30.7k
{
929
30.7k
    OPJ_INT32 one, poshalf;
930
30.7k
    OPJ_UINT32 i, j, k;
931
30.7k
    OPJ_INT32 *data = t1->data;
932
30.7k
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
933
30.7k
    const OPJ_UINT32 l_w = t1->w;
934
30.7k
    one = 1 << bpno;
935
30.7k
    poshalf = one >> 1;
936
310k
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
937
14.2M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
938
13.9M
            opj_flag_t flags = *flagsp;
939
13.9M
            if (flags != 0) {
940
10.4M
                opj_t1_dec_refpass_step_raw(
941
10.4M
                    t1,
942
10.4M
                    flagsp,
943
10.4M
                    data,
944
10.4M
                    poshalf,
945
10.4M
                    0U);
946
10.4M
                opj_t1_dec_refpass_step_raw(
947
10.4M
                    t1,
948
10.4M
                    flagsp,
949
10.4M
                    data + l_w,
950
10.4M
                    poshalf,
951
10.4M
                    1U);
952
10.4M
                opj_t1_dec_refpass_step_raw(
953
10.4M
                    t1,
954
10.4M
                    flagsp,
955
10.4M
                    data + 2 * l_w,
956
10.4M
                    poshalf,
957
10.4M
                    2U);
958
10.4M
                opj_t1_dec_refpass_step_raw(
959
10.4M
                    t1,
960
10.4M
                    flagsp,
961
10.4M
                    data + 3 * l_w,
962
10.4M
                    poshalf,
963
10.4M
                    3U);
964
10.4M
            }
965
13.9M
        }
966
279k
    }
967
30.7k
    if (k < t1->h) {
968
2.09M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
969
5.92M
            for (j = 0; j < t1->h - k; ++j) {
970
3.84M
                opj_t1_dec_refpass_step_raw(
971
3.84M
                    t1,
972
3.84M
                    flagsp,
973
3.84M
                    data + j * l_w,
974
3.84M
                    poshalf,
975
3.84M
                    j);
976
3.84M
            }
977
2.08M
        }
978
16.7k
    }
979
30.7k
}
980
981
1.71M
#define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
982
1.71M
{ \
983
1.71M
        OPJ_INT32 one, poshalf; \
984
1.71M
        OPJ_UINT32 i, j, k; \
985
1.71M
        register OPJ_INT32 *data = t1->data; \
986
1.71M
        register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
987
1.71M
        const OPJ_UINT32 l_w = w; \
988
1.71M
        opj_mqc_t* mqc = &(t1->mqc); \
989
1.71M
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
990
1.71M
        register OPJ_UINT32 v; \
991
1.71M
        one = 1 << bpno; \
992
1.71M
        poshalf = one >> 1; \
993
21.1M
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
994
767M
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
995
748M
                        opj_flag_t flags = *flagsp; \
996
748M
                        if( flags != 0 ) { \
997
459M
                            opj_t1_dec_refpass_step_mqc_macro( \
998
459M
                                flags, data, l_w, 0, \
999
459M
                                mqc, curctx, v, a, c, ct, poshalf); \
1000
459M
                            opj_t1_dec_refpass_step_mqc_macro( \
1001
459M
                                flags, data, l_w, 1, \
1002
459M
                                mqc, curctx, v, a, c, ct, poshalf); \
1003
459M
                            opj_t1_dec_refpass_step_mqc_macro( \
1004
459M
                                flags, data, l_w, 2, \
1005
459M
                                mqc, curctx, v, a, c, ct, poshalf); \
1006
459M
                            opj_t1_dec_refpass_step_mqc_macro( \
1007
459M
                                flags, data, l_w, 3, \
1008
459M
                                mqc, curctx, v, a, c, ct, poshalf); \
1009
459M
                            *flagsp = flags; \
1010
459M
                        } \
1011
748M
                } \
1012
19.4M
        } \
1013
1.71M
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1014
1.71M
        if( k < h ) { \
1015
21.5M
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1016
50.0M
                for (j = 0; j < h - k; ++j) { \
1017
29.0M
                        opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1018
29.0M
                } \
1019
20.9M
            } \
1020
592k
        } \
1021
1.71M
}
1022
1023
static void opj_t1_dec_refpass_mqc_64x64(
1024
    opj_t1_t *t1,
1025
    OPJ_INT32 bpno)
1026
622k
{
1027
622k
    opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1028
622k
}
1029
1030
static void opj_t1_dec_refpass_mqc_generic(
1031
    opj_t1_t *t1,
1032
    OPJ_INT32 bpno)
1033
1.09M
{
1034
1.09M
    opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1035
1.09M
}
1036
1037
static void opj_t1_dec_refpass_mqc(
1038
    opj_t1_t *t1,
1039
    OPJ_INT32 bpno)
1040
1.71M
{
1041
1.71M
    if (t1->w == 64 && t1->h == 64) {
1042
622k
        opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1043
1.09M
    } else {
1044
1.09M
        opj_t1_dec_refpass_mqc_generic(t1, bpno);
1045
1.09M
    }
1046
1.71M
}
1047
1048
/**
1049
Encode clean-up pass step
1050
*/
1051
2.13G
#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
1052
2.13G
{ \
1053
2.13G
    OPJ_UINT32 v; \
1054
2.13G
    OPJ_UINT32 ci; \
1055
2.13G
    opj_flag_t* const flagsp = (flagspIn); \
1056
2.13G
    const OPJ_INT32* l_datap = (datapIn); \
1057
2.13G
    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
1058
2.13G
                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1059
2.13G
 \
1060
2.13G
    if ((*flagsp & check) == check) { \
1061
4.06M
        if (runlen == 0) { \
1062
4.06M
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1063
4.06M
        } else if (runlen == 1) { \
1064
0
            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
1065
0
        } else if (runlen == 2) { \
1066
0
            *flagsp &= ~(T1_PI_2 | T1_PI_3); \
1067
0
        } else if (runlen == 3) { \
1068
0
            *flagsp &= ~(T1_PI_3); \
1069
0
        } \
1070
4.06M
    } \
1071
2.13G
    else \
1072
10.5G
    for (ci = runlen; ci < lim; ++ci) { \
1073
8.42G
        OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
1074
8.42G
        if ((agg != 0) && (ci == runlen)) { \
1075
27.8M
            goto_PARTIAL = OPJ_TRUE; \
1076
27.8M
        } \
1077
8.42G
        else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
1078
1.57G
            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
1079
1.57G
/* #ifdef DEBUG_ENC_CLN */ \
1080
1.57G
/*            printf("   ctxt1=%d\n", ctxt1); */ \
1081
1.57G
/* #endif */ \
1082
1.57G
            opj_t1_setcurctx(curctx, ctxt1); \
1083
1.57G
            v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
1084
1.57G
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
1085
1.57G
            if (v) { \
1086
98.3M
                goto_PARTIAL = OPJ_TRUE; \
1087
98.3M
            } \
1088
1.57G
        } \
1089
8.42G
        if( goto_PARTIAL ) { \
1090
126M
            OPJ_UINT32 vsc; \
1091
126M
            OPJ_UINT32 ctxt2, spb; \
1092
126M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1093
126M
                        *flagsp, \
1094
126M
                        flagsp[-1], flagsp[1], \
1095
126M
                        ci); \
1096
126M
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
1097
126M
                                                (OPJ_UINT32)bpno); \
1098
126M
            ctxt2 = opj_t1_getctxno_sc(lu); \
1099
126M
/* #ifdef DEBUG_ENC_CLN */ \
1100
126M
/*           printf("   ctxt2=%d\n", ctxt2); */ \
1101
126M
/* #endif */ \
1102
126M
            opj_t1_setcurctx(curctx, ctxt2); \
1103
126M
 \
1104
126M
            v = opj_smr_sign(*l_datap); \
1105
126M
            spb = opj_t1_getspb(lu); \
1106
126M
/* #ifdef DEBUG_ENC_CLN */ \
1107
126M
/*           printf("   spb=%d\n", spb); */\
1108
126M
/* #endif */ \
1109
126M
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
1110
126M
            vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
1111
126M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
1112
126M
        } \
1113
8.42G
        *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
1114
8.42G
        l_datap ++; \
1115
8.42G
    } \
1116
2.13G
}
1117
1118
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1119
                                      flags, flagsp, flags_stride, data, \
1120
                                      data_stride, ci, mqc, curctx, \
1121
2.41G
                                      v, a, c, ct, oneplushalf, vsc) \
1122
2.41G
{ \
1123
2.41G
    if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1124
590M
        do { \
1125
590M
            if( !partial ) { \
1126
580M
                OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1127
580M
                opj_t1_setcurctx(curctx, ctxt1); \
1128
580M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1129
580M
                if( !v ) \
1130
580M
                    break; \
1131
580M
            } \
1132
590M
            { \
1133
253M
                OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1134
253M
                                    flags, flagsp[-1], flagsp[1], \
1135
253M
                                    ci); \
1136
253M
                opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1137
253M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1138
253M
                v = v ^ opj_t1_getspb(lu); \
1139
253M
                data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1140
253M
                opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1141
253M
            } \
1142
253M
        } while(0); \
1143
590M
    } \
1144
2.41G
}
1145
1146
static void opj_t1_dec_clnpass_step(
1147
    opj_t1_t *t1,
1148
    opj_flag_t *flagsp,
1149
    OPJ_INT32 *datap,
1150
    OPJ_INT32 oneplushalf,
1151
    OPJ_UINT32 ci,
1152
    OPJ_UINT32 vsc)
1153
37.9M
{
1154
37.9M
    OPJ_UINT32 v;
1155
1156
37.9M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1157
37.9M
    opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1158
37.9M
                                  *flagsp, flagsp, t1->w + 2U, datap,
1159
37.9M
                                  0, ci, mqc, mqc->curctx,
1160
37.9M
                                  v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1161
37.9M
}
1162
1163
static void opj_t1_enc_clnpass(
1164
    opj_t1_t *t1,
1165
    OPJ_INT32 bpno,
1166
    OPJ_INT32 *nmsedec,
1167
    OPJ_UINT32 cblksty)
1168
9.56M
{
1169
9.56M
    OPJ_UINT32 i, k;
1170
9.56M
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1171
9.56M
    opj_mqc_t* mqc = &(t1->mqc);
1172
9.56M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1173
9.56M
    const OPJ_INT32* datap = t1->data;
1174
9.56M
    opj_flag_t *f = &T1_FLAGS(0, 0);
1175
9.56M
    const OPJ_UINT32 extra = 2U;
1176
1177
9.56M
    *nmsedec = 0;
1178
#ifdef DEBUG_ENC_CLN
1179
    printf("enc_clnpass: bpno=%d\n", bpno);
1180
#endif
1181
106M
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
1182
#ifdef DEBUG_ENC_CLN
1183
        printf(" k=%d\n", k);
1184
#endif
1185
4.74G
        for (i = 0; i < t1->w; ++i, f++) {
1186
4.64G
            OPJ_UINT32 agg, runlen;
1187
#ifdef DEBUG_ENC_CLN
1188
            printf("  i=%d\n", i);
1189
#endif
1190
4.64G
            agg = !*f;
1191
#ifdef DEBUG_ENC_CLN
1192
            printf("   agg=%d\n", agg);
1193
#endif
1194
4.64G
            if (agg) {
1195
12.7G
                for (runlen = 0; runlen < 4; ++runlen, ++datap) {
1196
10.2G
                    if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
1197
27.8M
                        break;
1198
27.8M
                    }
1199
10.2G
                }
1200
2.56G
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
1201
2.56G
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
1202
2.56G
                if (runlen == 4) {
1203
2.53G
                    continue;
1204
2.53G
                }
1205
27.8M
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
1206
27.8M
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
1207
27.8M
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
1208
2.07G
            } else {
1209
2.07G
                runlen = 0;
1210
2.07G
            }
1211
2.10G
            opj_t1_enc_clnpass_step_macro(
1212
2.10G
                mqc, curctx, a, c, ct,
1213
2.10G
                f,
1214
2.10G
                datap,
1215
2.10G
                bpno,
1216
2.10G
                one,
1217
2.10G
                nmsedec,
1218
2.10G
                agg,
1219
2.10G
                runlen,
1220
2.10G
                4U,
1221
2.10G
                cblksty);
1222
2.10G
            datap += 4 - runlen;
1223
2.10G
        }
1224
96.9M
    }
1225
9.56M
    if (k < t1->h) {
1226
1.00M
        const OPJ_UINT32 agg = 0;
1227
1.00M
        const OPJ_UINT32 runlen = 0;
1228
#ifdef DEBUG_ENC_CLN
1229
        printf(" k=%d\n", k);
1230
#endif
1231
35.4M
        for (i = 0; i < t1->w; ++i, f++) {
1232
#ifdef DEBUG_ENC_CLN
1233
            printf("  i=%d\n", i);
1234
            printf("   agg=%d\n", agg);
1235
#endif
1236
34.4M
            opj_t1_enc_clnpass_step_macro(
1237
34.4M
                mqc, curctx, a, c, ct,
1238
34.4M
                f,
1239
34.4M
                datap,
1240
34.4M
                bpno,
1241
34.4M
                one,
1242
34.4M
                nmsedec,
1243
34.4M
                agg,
1244
34.4M
                runlen,
1245
34.4M
                t1->h - k,
1246
34.4M
                cblksty);
1247
34.4M
            datap += t1->h - k;
1248
34.4M
        }
1249
1.00M
    }
1250
1251
9.56M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1252
9.56M
}
1253
1254
2.20M
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1255
2.20M
{ \
1256
2.20M
    OPJ_INT32 one, half, oneplushalf; \
1257
2.20M
    OPJ_UINT32 runlen; \
1258
2.20M
    OPJ_UINT32 i, j, k; \
1259
2.20M
    const OPJ_UINT32 l_w = w; \
1260
2.20M
    opj_mqc_t* mqc = &(t1->mqc); \
1261
2.20M
    register OPJ_INT32 *data = t1->data; \
1262
2.20M
    register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1263
2.20M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1264
2.20M
    register OPJ_UINT32 v; \
1265
2.20M
    one = 1 << bpno; \
1266
2.20M
    half = one >> 1; \
1267
2.20M
    oneplushalf = one | half; \
1268
27.5M
    for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1269
1.04G
        for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1270
1.02G
            opj_flag_t flags = *flagsp; \
1271
1.02G
            if (flags == 0) { \
1272
432M
                OPJ_UINT32 partial = OPJ_TRUE; \
1273
432M
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1274
432M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1275
432M
                if (!v) { \
1276
422M
                    continue; \
1277
422M
                } \
1278
432M
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1279
9.52M
                opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1280
9.52M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1281
9.52M
                runlen = (runlen << 1) | v; \
1282
9.52M
                switch(runlen) { \
1283
2.99M
                    case 0: \
1284
2.99M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1285
2.99M
                                            flags, flagsp, flags_stride, data, \
1286
2.99M
                                            l_w, 0, mqc, curctx, \
1287
2.99M
                                            v, a, c, ct, oneplushalf, vsc); \
1288
2.99M
                        partial = OPJ_FALSE; \
1289
2.99M
                        /* FALLTHRU */ \
1290
5.44M
                    case 1: \
1291
5.44M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1292
5.44M
                                            flags, flagsp, flags_stride, data, \
1293
5.44M
                                            l_w, 1, mqc, curctx, \
1294
5.44M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1295
5.44M
                        partial = OPJ_FALSE; \
1296
5.44M
                        /* FALLTHRU */ \
1297
7.66M
                    case 2: \
1298
7.66M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1299
7.66M
                                            flags, flagsp, flags_stride, data, \
1300
7.66M
                                            l_w, 2, mqc, curctx, \
1301
7.66M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1302
7.66M
                        partial = OPJ_FALSE; \
1303
7.66M
                        /* FALLTHRU */ \
1304
9.52M
                    case 3: \
1305
9.52M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1306
9.52M
                                            flags, flagsp, flags_stride, data, \
1307
9.52M
                                            l_w, 3, mqc, curctx, \
1308
9.52M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1309
9.52M
                        break; \
1310
9.52M
                } \
1311
588M
            } else { \
1312
588M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1313
588M
                                    flags, flagsp, flags_stride, data, \
1314
588M
                                    l_w, 0, mqc, curctx, \
1315
588M
                                    v, a, c, ct, oneplushalf, vsc); \
1316
588M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1317
588M
                                    flags, flagsp, flags_stride, data, \
1318
588M
                                    l_w, 1, mqc, curctx, \
1319
588M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1320
588M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1321
588M
                                    flags, flagsp, flags_stride, data, \
1322
588M
                                    l_w, 2, mqc, curctx, \
1323
588M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1324
588M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1325
588M
                                    flags, flagsp, flags_stride, data, \
1326
588M
                                    l_w, 3, mqc, curctx, \
1327
588M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1328
588M
            } \
1329
1.02G
            *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1330
598M
        } \
1331
25.3M
    } \
1332
2.20M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1333
2.20M
    if( k < h ) { \
1334
26.5M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1335
63.7M
            for (j = 0; j < h - k; ++j) { \
1336
37.9M
                opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1337
37.9M
            } \
1338
25.8M
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1339
25.8M
        } \
1340
727k
    } \
1341
2.20M
}
1342
1343
static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1344
2.20M
{
1345
2.20M
    if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1346
1.48M
        opj_mqc_t* mqc = &(t1->mqc);
1347
1.48M
        OPJ_UINT32 v, v2;
1348
1.48M
        opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1349
1.48M
        opj_mqc_decode(v, mqc);
1350
1.48M
        opj_mqc_decode(v2, mqc);
1351
1.48M
        v = (v << 1) | v2;
1352
1.48M
        opj_mqc_decode(v2, mqc);
1353
1.48M
        v = (v << 1) | v2;
1354
1.48M
        opj_mqc_decode(v2, mqc);
1355
1.48M
        v = (v << 1) | v2;
1356
        /*
1357
        if (v!=0xa) {
1358
            opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1359
        }
1360
        */
1361
1.48M
    }
1362
2.20M
}
1363
1364
static void opj_t1_dec_clnpass_64x64_novsc(
1365
    opj_t1_t *t1,
1366
    OPJ_INT32 bpno)
1367
407k
{
1368
407k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1369
407k
}
1370
1371
static void opj_t1_dec_clnpass_64x64_vsc(
1372
    opj_t1_t *t1,
1373
    OPJ_INT32 bpno)
1374
421k
{
1375
421k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1376
421k
}
1377
1378
static void opj_t1_dec_clnpass_generic_novsc(
1379
    opj_t1_t *t1,
1380
    OPJ_INT32 bpno)
1381
726k
{
1382
726k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1383
726k
                                t1->w + 2U);
1384
726k
}
1385
1386
static void opj_t1_dec_clnpass_generic_vsc(
1387
    opj_t1_t *t1,
1388
    OPJ_INT32 bpno)
1389
648k
{
1390
648k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1391
648k
                                t1->w + 2U);
1392
648k
}
1393
1394
static void opj_t1_dec_clnpass(
1395
    opj_t1_t *t1,
1396
    OPJ_INT32 bpno,
1397
    OPJ_INT32 cblksty)
1398
2.20M
{
1399
2.20M
    if (t1->w == 64 && t1->h == 64) {
1400
829k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1401
421k
            opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1402
421k
        } else {
1403
407k
            opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1404
407k
        }
1405
1.37M
    } else {
1406
1.37M
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1407
648k
            opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1408
726k
        } else {
1409
726k
            opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1410
726k
        }
1411
1.37M
    }
1412
2.20M
    opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1413
2.20M
}
1414
1415
1416
static OPJ_FLOAT64 opj_t1_getwmsedec(
1417
    OPJ_INT32 nmsedec,
1418
    OPJ_UINT32 compno,
1419
    OPJ_UINT32 level,
1420
    OPJ_UINT32 orient,
1421
    OPJ_INT32 bpno,
1422
    OPJ_UINT32 qmfbid,
1423
    OPJ_FLOAT64 stepsize,
1424
    OPJ_UINT32 numcomps,
1425
    const OPJ_FLOAT64 * mct_norms,
1426
    OPJ_UINT32 mct_numcomps)
1427
26.8M
{
1428
26.8M
    OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1429
26.8M
    OPJ_ARG_NOT_USED(numcomps);
1430
1431
26.8M
    if (mct_norms && (compno < mct_numcomps)) {
1432
281k
        w1 = mct_norms[compno];
1433
281k
    }
1434
1435
26.8M
    if (qmfbid == 1) {
1436
12.9M
        w2 = opj_dwt_getnorm(level, orient);
1437
13.8M
    } else {    /* if (qmfbid == 0) */
1438
13.8M
        const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1439
13.8M
                                    (orient == 3) ? 2 : 1;
1440
13.8M
        w2 = opj_dwt_getnorm_real(level, orient);
1441
        /* Not sure this is right. But preserves past behaviour */
1442
13.8M
        stepsize /= (1 << log2_gain);
1443
13.8M
    }
1444
1445
26.8M
    wmsedec = w1 * w2 * stepsize * (1 << bpno);
1446
26.8M
    wmsedec *= wmsedec * nmsedec / 8192.0;
1447
1448
26.8M
    return wmsedec;
1449
26.8M
}
1450
1451
static OPJ_BOOL opj_t1_allocate_buffers(
1452
    opj_t1_t *t1,
1453
    OPJ_UINT32 w,
1454
    OPJ_UINT32 h)
1455
40.2M
{
1456
40.2M
    OPJ_UINT32 flagssize;
1457
40.2M
    OPJ_UINT32 flags_stride;
1458
1459
    /* No risk of overflow. Prior checks ensure those assert are met */
1460
    /* They are per the specification */
1461
40.2M
    assert(w <= 1024);
1462
40.2M
    assert(h <= 1024);
1463
40.2M
    assert(w * h <= 4096);
1464
1465
    /* encoder uses tile buffer, so no need to allocate */
1466
40.2M
    {
1467
40.2M
        OPJ_UINT32 datasize = w * h;
1468
1469
40.2M
        if (datasize > t1->datasize) {
1470
127k
            opj_aligned_free(t1->data);
1471
127k
            t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1472
127k
            if (!t1->data) {
1473
                /* FIXME event manager error callback */
1474
0
                return OPJ_FALSE;
1475
0
            }
1476
127k
            t1->datasize = datasize;
1477
127k
        }
1478
        /* memset first arg is declared to never be null by gcc */
1479
40.2M
        if (t1->data != NULL) {
1480
40.2M
            memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1481
40.2M
        }
1482
40.2M
    }
1483
1484
0
    flags_stride = w + 2U; /* can't be 0U */
1485
1486
40.2M
    flagssize = (h + 3U) / 4U + 2U;
1487
1488
40.2M
    flagssize *= flags_stride;
1489
40.2M
    {
1490
40.2M
        opj_flag_t* p;
1491
40.2M
        OPJ_UINT32 x;
1492
40.2M
        OPJ_UINT32 flags_height = (h + 3U) / 4U;
1493
1494
40.2M
        if (flagssize > t1->flagssize) {
1495
1496
1.42M
            opj_aligned_free(t1->flags);
1497
1.42M
            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1498
1.42M
                            opj_flag_t));
1499
1.42M
            if (!t1->flags) {
1500
                /* FIXME event manager error callback */
1501
0
                return OPJ_FALSE;
1502
0
            }
1503
1.42M
        }
1504
40.2M
        t1->flagssize = flagssize;
1505
1506
40.2M
        memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1507
1508
40.2M
        p = &t1->flags[0];
1509
757M
        for (x = 0; x < flags_stride; ++x) {
1510
            /* magic value to hopefully stop any passes being interested in this entry */
1511
717M
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1512
717M
        }
1513
1514
40.2M
        p = &t1->flags[((flags_height + 1) * flags_stride)];
1515
757M
        for (x = 0; x < flags_stride; ++x) {
1516
            /* magic value to hopefully stop any passes being interested in this entry */
1517
717M
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1518
717M
        }
1519
1520
40.2M
        if (h % 4) {
1521
16.3M
            OPJ_UINT32 v = 0;
1522
16.3M
            p = &t1->flags[((flags_height) * flags_stride)];
1523
16.3M
            if (h % 4 == 1) {
1524
12.4M
                v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1525
12.4M
            } else if (h % 4 == 2) {
1526
3.05M
                v |= T1_PI_2 | T1_PI_3;
1527
3.05M
            } else if (h % 4 == 3) {
1528
864k
                v |= T1_PI_3;
1529
864k
            }
1530
199M
            for (x = 0; x < flags_stride; ++x) {
1531
182M
                *p++ = v;
1532
182M
            }
1533
16.3M
        }
1534
40.2M
    }
1535
1536
0
    t1->w = w;
1537
40.2M
    t1->h = h;
1538
1539
40.2M
    return OPJ_TRUE;
1540
40.2M
}
1541
1542
/* ----------------------------------------------------------------------- */
1543
1544
/* ----------------------------------------------------------------------- */
1545
/**
1546
 * Creates a new Tier 1 handle
1547
 * and initializes the look-up tables of the Tier-1 coder/decoder
1548
 * @return a new T1 handle if successful, returns NULL otherwise
1549
*/
1550
opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1551
52.8k
{
1552
52.8k
    opj_t1_t *l_t1 = 00;
1553
1554
52.8k
    l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1555
52.8k
    if (!l_t1) {
1556
0
        return 00;
1557
0
    }
1558
1559
52.8k
    l_t1->encoder = isEncoder;
1560
1561
52.8k
    return l_t1;
1562
52.8k
}
1563
1564
1565
/**
1566
 * Destroys a previously created T1 handle
1567
 *
1568
 * @param p_t1 Tier 1 handle to destroy
1569
*/
1570
void opj_t1_destroy(opj_t1_t *p_t1)
1571
52.8k
{
1572
52.8k
    if (! p_t1) {
1573
0
        return;
1574
0
    }
1575
1576
52.8k
    if (p_t1->data) {
1577
52.7k
        opj_aligned_free(p_t1->data);
1578
52.7k
        p_t1->data = 00;
1579
52.7k
    }
1580
1581
52.8k
    if (p_t1->flags) {
1582
52.7k
        opj_aligned_free(p_t1->flags);
1583
52.7k
        p_t1->flags = 00;
1584
52.7k
    }
1585
1586
52.8k
    opj_free(p_t1->cblkdatabuffer);
1587
1588
52.8k
    opj_free(p_t1);
1589
52.8k
}
1590
1591
typedef struct {
1592
    OPJ_BOOL whole_tile_decoding;
1593
    OPJ_UINT32 resno;
1594
    opj_tcd_cblk_dec_t* cblk;
1595
    opj_tcd_band_t* band;
1596
    opj_tcd_tilecomp_t* tilec;
1597
    opj_tccp_t* tccp;
1598
    OPJ_BOOL mustuse_cblkdatabuffer;
1599
    volatile OPJ_BOOL* pret;
1600
    opj_event_mgr_t *p_manager;
1601
    opj_mutex_t* p_manager_mutex;
1602
    OPJ_BOOL check_pterm;
1603
} opj_t1_cblk_decode_processing_job_t;
1604
1605
static void opj_t1_destroy_wrapper(void* t1)
1606
52.8k
{
1607
52.8k
    opj_t1_destroy((opj_t1_t*) t1);
1608
52.8k
}
1609
1610
static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1611
42.6M
{
1612
42.6M
    opj_tcd_cblk_dec_t* cblk;
1613
42.6M
    opj_tcd_band_t* band;
1614
42.6M
    opj_tcd_tilecomp_t* tilec;
1615
42.6M
    opj_tccp_t* tccp;
1616
42.6M
    OPJ_INT32* OPJ_RESTRICT datap;
1617
42.6M
    OPJ_UINT32 cblk_w, cblk_h;
1618
42.6M
    OPJ_INT32 x, y;
1619
42.6M
    OPJ_UINT32 i, j;
1620
42.6M
    opj_t1_cblk_decode_processing_job_t* job;
1621
42.6M
    opj_t1_t* t1;
1622
42.6M
    OPJ_UINT32 resno;
1623
42.6M
    OPJ_UINT32 tile_w;
1624
1625
42.6M
    job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1626
1627
42.6M
    cblk = job->cblk;
1628
1629
42.6M
    if (!job->whole_tile_decoding) {
1630
25.9M
        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1631
25.9M
        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1632
1633
25.9M
        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1634
25.9M
                             cblk_w * cblk_h);
1635
25.9M
        if (cblk->decoded_data == NULL) {
1636
0
            if (job->p_manager_mutex) {
1637
0
                opj_mutex_lock(job->p_manager_mutex);
1638
0
            }
1639
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1640
0
                          "Cannot allocate cblk->decoded_data\n");
1641
0
            if (job->p_manager_mutex) {
1642
0
                opj_mutex_unlock(job->p_manager_mutex);
1643
0
            }
1644
0
            *(job->pret) = OPJ_FALSE;
1645
0
            opj_free(job);
1646
0
            return;
1647
0
        }
1648
        /* Zero-init required */
1649
25.9M
        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1650
25.9M
    } else if (cblk->decoded_data) {
1651
        /* Not sure if that code path can happen, but better be */
1652
        /* safe than sorry */
1653
0
        opj_aligned_free(cblk->decoded_data);
1654
0
        cblk->decoded_data = NULL;
1655
0
    }
1656
1657
42.6M
    resno = job->resno;
1658
42.6M
    band = job->band;
1659
42.6M
    tilec = job->tilec;
1660
42.6M
    tccp = job->tccp;
1661
42.6M
    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1662
42.6M
                          -
1663
42.6M
                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1664
1665
42.6M
    if (!*(job->pret)) {
1666
0
        opj_free(job);
1667
0
        return;
1668
0
    }
1669
1670
42.6M
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1671
42.6M
    if (t1 == NULL) {
1672
39.9k
        t1 = opj_t1_create(OPJ_FALSE);
1673
39.9k
        if (t1 == NULL) {
1674
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1675
0
                          "Cannot allocate Tier 1 handle\n");
1676
0
            *(job->pret) = OPJ_FALSE;
1677
0
            opj_free(job);
1678
0
            return;
1679
0
        }
1680
39.9k
        if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
1681
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1682
0
                          "Unable to set t1 handle as TLS\n");
1683
0
            opj_t1_destroy(t1);
1684
0
            *(job->pret) = OPJ_FALSE;
1685
0
            opj_free(job);
1686
0
            return;
1687
0
        }
1688
39.9k
    }
1689
42.6M
    t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1690
1691
42.6M
    if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
1692
5.01M
        if (OPJ_FALSE == opj_t1_ht_decode_cblk(
1693
5.01M
                    t1,
1694
5.01M
                    cblk,
1695
5.01M
                    band->bandno,
1696
5.01M
                    (OPJ_UINT32)tccp->roishift,
1697
5.01M
                    tccp->cblksty,
1698
5.01M
                    job->p_manager,
1699
5.01M
                    job->p_manager_mutex,
1700
5.01M
                    job->check_pterm)) {
1701
2.03k
            *(job->pret) = OPJ_FALSE;
1702
2.03k
            opj_free(job);
1703
2.03k
            return;
1704
2.03k
        }
1705
37.6M
    } else {
1706
37.6M
        if (OPJ_FALSE == opj_t1_decode_cblk(
1707
37.6M
                    t1,
1708
37.6M
                    cblk,
1709
37.6M
                    band->bandno,
1710
37.6M
                    (OPJ_UINT32)tccp->roishift,
1711
37.6M
                    tccp->cblksty,
1712
37.6M
                    job->p_manager,
1713
37.6M
                    job->p_manager_mutex,
1714
37.6M
                    job->check_pterm)) {
1715
307
            *(job->pret) = OPJ_FALSE;
1716
307
            opj_free(job);
1717
307
            return;
1718
307
        }
1719
37.6M
    }
1720
1721
42.6M
    x = cblk->x0 - band->x0;
1722
42.6M
    y = cblk->y0 - band->y0;
1723
42.6M
    if (band->bandno & 1) {
1724
7.73M
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1725
7.73M
        x += pres->x1 - pres->x0;
1726
7.73M
    }
1727
42.6M
    if (band->bandno & 2) {
1728
7.79M
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1729
7.79M
        y += pres->y1 - pres->y0;
1730
7.79M
    }
1731
1732
42.6M
    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1733
42.6M
    cblk_w = t1->w;
1734
42.6M
    cblk_h = t1->h;
1735
1736
42.6M
    if (tccp->roishift) {
1737
1.18M
        if (tccp->roishift >= 31) {
1738
20.0k
            for (j = 0; j < cblk_h; ++j) {
1739
355k
                for (i = 0; i < cblk_w; ++i) {
1740
335k
                    datap[(j * cblk_w) + i] = 0;
1741
335k
                }
1742
19.4k
            }
1743
1.18M
        } else {
1744
1.18M
            OPJ_INT32 thresh = 1 << tccp->roishift;
1745
5.59M
            for (j = 0; j < cblk_h; ++j) {
1746
137M
                for (i = 0; i < cblk_w; ++i) {
1747
132M
                    OPJ_INT32 val = datap[(j * cblk_w) + i];
1748
132M
                    OPJ_INT32 mag = abs(val);
1749
132M
                    if (mag >= thresh) {
1750
12.9M
                        mag >>= tccp->roishift;
1751
12.9M
                        datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1752
12.9M
                    }
1753
132M
                }
1754
4.41M
            }
1755
1.18M
        }
1756
1.18M
    }
1757
1758
    /* Both can be non NULL if for example decoding a full tile and then */
1759
    /* partially a tile. In which case partial decoding should be the */
1760
    /* priority */
1761
42.6M
    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1762
1763
42.6M
    if (cblk->decoded_data) {
1764
25.9M
        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1765
25.9M
        if (tccp->qmfbid == 1) {
1766
4.52G
            for (i = 0; i < cblk_size; ++i) {
1767
4.51G
                datap[i] /= 2;
1768
4.51G
            }
1769
14.1M
        } else {        /* if (tccp->qmfbid == 0) */
1770
11.7M
            const float stepsize = 0.5f * band->stepsize;
1771
11.7M
            i = 0;
1772
11.7M
#ifdef __SSE2__
1773
11.7M
            {
1774
11.7M
                const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1775
83.8M
                for (; i < (cblk_size & ~15U); i += 16) {
1776
72.0M
                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1777
72.0M
                                                           datap + 0)));
1778
72.0M
                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1779
72.0M
                                                           datap + 4)));
1780
72.0M
                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1781
72.0M
                                                           datap + 8)));
1782
72.0M
                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1783
72.0M
                                                           datap + 12)));
1784
72.0M
                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1785
72.0M
                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1786
72.0M
                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1787
72.0M
                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1788
72.0M
                    datap += 16;
1789
72.0M
                }
1790
11.7M
            }
1791
11.7M
#endif
1792
31.5M
            for (; i < cblk_size; ++i) {
1793
19.7M
                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1794
19.7M
                memcpy(datap, &tmp, sizeof(tmp));
1795
19.7M
                datap++;
1796
19.7M
            }
1797
11.7M
        }
1798
25.9M
    } else if (tccp->qmfbid == 1) {
1799
9.31M
        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1800
9.31M
                                                       (OPJ_SIZE_T)x];
1801
212M
        for (j = 0; j < cblk_h; ++j) {
1802
            //positive -> round down aka.  (83)/2 =  41.5 ->  41
1803
            //negative -> round up   aka. (-83)/2 = -41.5 -> -41
1804
#if defined(__AVX512F__)
1805
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1806
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1807
            for (i = 0; i < cblk_w / 16; ++i) {
1808
                __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in));
1809
                const __m512i add_avx = _mm512_srli_epi32(in_avx, 31);
1810
                in_avx = _mm512_add_epi32(in_avx, add_avx);
1811
                _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1));
1812
                ptr_in += 16;
1813
                ptr_out += 16;
1814
            }
1815
1816
            for (i = 0; i < cblk_w % 16; ++i) {
1817
                ptr_out[i] = ptr_in[i] / 2;
1818
            }
1819
#elif defined(__AVX2__)
1820
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1821
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1822
            for (i = 0; i < cblk_w / 8; ++i) {
1823
                __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in));
1824
                const __m256i add_avx = _mm256_srli_epi32(in_avx, 31);
1825
                in_avx = _mm256_add_epi32(in_avx, add_avx);
1826
                _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1));
1827
                ptr_in += 8;
1828
                ptr_out += 8;
1829
            }
1830
1831
            for (i = 0; i < cblk_w % 8; ++i) {
1832
                ptr_out[i] = ptr_in[i] / 2;
1833
            }
1834
#else
1835
203M
            i = 0;
1836
1.64G
            for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1837
1.44G
                OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1838
1.44G
                OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1839
1.44G
                OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1840
1.44G
                OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1841
1.44G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1842
1.44G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1843
1.44G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1844
1.44G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1845
1.44G
            }
1846
286M
            for (; i < cblk_w; ++i) {
1847
83.0M
                OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1848
83.0M
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1849
83.0M
            }
1850
203M
#endif
1851
203M
        }
1852
9.31M
    } else {        /* if (tccp->qmfbid == 0) */
1853
7.47M
        const float stepsize = 0.5f * band->stepsize;
1854
7.47M
        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1855
7.47M
                                                         tile_w + (OPJ_SIZE_T)x];
1856
64.9M
        for (j = 0; j < cblk_h; ++j) {
1857
57.4M
            OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1858
1.75G
            for (i = 0; i < cblk_w; ++i) {
1859
1.69G
                OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1860
1.69G
                *tiledp2 = tmp;
1861
1.69G
                datap++;
1862
1.69G
                tiledp2++;
1863
1.69G
            }
1864
57.4M
            tiledp += tile_w;
1865
57.4M
        }
1866
7.47M
    }
1867
1868
42.6M
    opj_free(job);
1869
42.6M
}
1870
1871
1872
void opj_t1_decode_cblks(opj_tcd_t* tcd,
1873
                         volatile OPJ_BOOL* pret,
1874
                         opj_tcd_tilecomp_t* tilec,
1875
                         opj_tccp_t* tccp,
1876
                         opj_event_mgr_t *p_manager,
1877
                         opj_mutex_t* p_manager_mutex,
1878
                         OPJ_BOOL check_pterm
1879
                        )
1880
238k
{
1881
238k
    opj_thread_pool_t* tp = tcd->thread_pool;
1882
238k
    OPJ_UINT32 resno, bandno, precno, cblkno;
1883
1884
#ifdef DEBUG_VERBOSE
1885
    OPJ_UINT32 codeblocks_decoded = 0;
1886
    printf("Enter opj_t1_decode_cblks()\n");
1887
#endif
1888
1889
2.32M
    for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1890
2.09M
        opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1891
1892
7.88M
        for (bandno = 0; bandno < res->numbands; ++bandno) {
1893
5.79M
            opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1894
1895
43.9M
            for (precno = 0; precno < res->pw * res->ph; ++precno) {
1896
38.1M
                opj_tcd_precinct_t* precinct = &band->precincts[precno];
1897
1898
38.1M
                if (!opj_tcd_is_subband_area_of_interest(tcd,
1899
38.1M
                        tilec->compno,
1900
38.1M
                        resno,
1901
38.1M
                        band->bandno,
1902
38.1M
                        (OPJ_UINT32)precinct->x0,
1903
38.1M
                        (OPJ_UINT32)precinct->y0,
1904
38.1M
                        (OPJ_UINT32)precinct->x1,
1905
38.1M
                        (OPJ_UINT32)precinct->y1)) {
1906
21.6M
                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1907
10.7M
                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1908
10.7M
                        if (cblk->decoded_data) {
1909
#ifdef DEBUG_VERBOSE
1910
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1911
                                   cblk->x0, cblk->y0, resno, bandno);
1912
#endif
1913
0
                            opj_aligned_free(cblk->decoded_data);
1914
0
                            cblk->decoded_data = NULL;
1915
0
                        }
1916
10.7M
                    }
1917
10.9M
                    continue;
1918
10.9M
                }
1919
1920
73.0M
                for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1921
45.8M
                    opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1922
45.8M
                    opj_t1_cblk_decode_processing_job_t* job;
1923
1924
45.8M
                    if (!opj_tcd_is_subband_area_of_interest(tcd,
1925
45.8M
                            tilec->compno,
1926
45.8M
                            resno,
1927
45.8M
                            band->bandno,
1928
45.8M
                            (OPJ_UINT32)cblk->x0,
1929
45.8M
                            (OPJ_UINT32)cblk->y0,
1930
45.8M
                            (OPJ_UINT32)cblk->x1,
1931
45.8M
                            (OPJ_UINT32)cblk->y1)) {
1932
2.75M
                        if (cblk->decoded_data) {
1933
#ifdef DEBUG_VERBOSE
1934
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1935
                                   cblk->x0, cblk->y0, resno, bandno);
1936
#endif
1937
0
                            opj_aligned_free(cblk->decoded_data);
1938
0
                            cblk->decoded_data = NULL;
1939
0
                        }
1940
2.75M
                        continue;
1941
2.75M
                    }
1942
1943
43.1M
                    if (!tcd->whole_tile_decoding) {
1944
26.3M
                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1945
26.3M
                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1946
26.3M
                        if (cblk->decoded_data != NULL) {
1947
#ifdef DEBUG_VERBOSE
1948
                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1949
                                   cblk->x0, cblk->y0, resno, bandno);
1950
#endif
1951
0
                            continue;
1952
0
                        }
1953
26.3M
                        if (cblk_w == 0 || cblk_h == 0) {
1954
402k
                            continue;
1955
402k
                        }
1956
#ifdef DEBUG_VERBOSE
1957
                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1958
                               cblk->x0, cblk->y0, resno, bandno);
1959
#endif
1960
26.3M
                    }
1961
1962
42.6M
                    job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1963
42.6M
                            sizeof(opj_t1_cblk_decode_processing_job_t));
1964
42.6M
                    if (!job) {
1965
0
                        *pret = OPJ_FALSE;
1966
0
                        return;
1967
0
                    }
1968
42.6M
                    job->whole_tile_decoding = tcd->whole_tile_decoding;
1969
42.6M
                    job->resno = resno;
1970
42.6M
                    job->cblk = cblk;
1971
42.6M
                    job->band = band;
1972
42.6M
                    job->tilec = tilec;
1973
42.6M
                    job->tccp = tccp;
1974
42.6M
                    job->pret = pret;
1975
42.6M
                    job->p_manager_mutex = p_manager_mutex;
1976
42.6M
                    job->p_manager = p_manager;
1977
42.6M
                    job->check_pterm = check_pterm;
1978
42.6M
                    job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1979
42.6M
                    opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1980
#ifdef DEBUG_VERBOSE
1981
                    codeblocks_decoded ++;
1982
#endif
1983
42.6M
                    if (!(*pret)) {
1984
2.34k
                        return;
1985
2.34k
                    }
1986
42.6M
                } /* cblkno */
1987
27.1M
            } /* precno */
1988
5.79M
        } /* bandno */
1989
2.09M
    } /* resno */
1990
1991
#ifdef DEBUG_VERBOSE
1992
    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1993
#endif
1994
235k
    return;
1995
238k
}
1996
1997
1998
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1999
                                   opj_tcd_cblk_dec_t* cblk,
2000
                                   OPJ_UINT32 orient,
2001
                                   OPJ_UINT32 roishift,
2002
                                   OPJ_UINT32 cblksty,
2003
                                   opj_event_mgr_t *p_manager,
2004
                                   opj_mutex_t* p_manager_mutex,
2005
                                   OPJ_BOOL check_pterm)
2006
37.6M
{
2007
37.6M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2008
2009
37.6M
    OPJ_INT32 bpno_plus_one;
2010
37.6M
    OPJ_UINT32 passtype;
2011
37.6M
    OPJ_UINT32 segno, passno;
2012
37.6M
    OPJ_BYTE* cblkdata = NULL;
2013
37.6M
    OPJ_UINT32 cblkdataindex = 0;
2014
37.6M
    OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
2015
37.6M
    OPJ_INT32* original_t1_data = NULL;
2016
2017
37.6M
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2018
2019
37.6M
    if (!opj_t1_allocate_buffers(
2020
37.6M
                t1,
2021
37.6M
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2022
37.6M
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2023
0
        return OPJ_FALSE;
2024
0
    }
2025
2026
37.6M
    bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
2027
37.6M
    if (bpno_plus_one >= 31) {
2028
307
        if (p_manager_mutex) {
2029
307
            opj_mutex_lock(p_manager_mutex);
2030
307
        }
2031
307
        opj_event_msg(p_manager, EVT_WARNING,
2032
307
                      "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
2033
307
                      bpno_plus_one);
2034
307
        if (p_manager_mutex) {
2035
307
            opj_mutex_unlock(p_manager_mutex);
2036
307
        }
2037
307
        return OPJ_FALSE;
2038
307
    }
2039
37.6M
    passtype = 2;
2040
2041
37.6M
    opj_mqc_resetstates(mqc);
2042
37.6M
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2043
37.6M
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2044
37.6M
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2045
2046
37.6M
    if (cblk->corrupted) {
2047
0
        assert(cblk->numchunks == 0);
2048
0
        return OPJ_TRUE;
2049
0
    }
2050
2051
    /* Even if we have a single chunk, in multi-threaded decoding */
2052
    /* the insertion of our synthetic marker might potentially override */
2053
    /* valid codestream of other codeblocks decoded in parallel. */
2054
37.6M
    if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer &&
2055
49.5k
                                cblk->numchunks > 0)) {
2056
49.5k
        OPJ_UINT32 i;
2057
49.5k
        OPJ_UINT32 cblk_len;
2058
2059
        /* Compute whole codeblock length from chunk lengths */
2060
49.5k
        cblk_len = 0;
2061
315k
        for (i = 0; i < cblk->numchunks; i++) {
2062
266k
            cblk_len += cblk->chunks[i].len;
2063
266k
        }
2064
2065
        /* Allocate temporary memory if needed */
2066
49.5k
        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
2067
13.8k
            cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
2068
13.8k
                                              cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
2069
13.8k
            if (cblkdata == NULL) {
2070
0
                return OPJ_FALSE;
2071
0
            }
2072
13.8k
            t1->cblkdatabuffer = cblkdata;
2073
13.8k
            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
2074
13.8k
            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
2075
13.8k
        }
2076
2077
        /* Concatenate all chunks */
2078
49.5k
        cblkdata = t1->cblkdatabuffer;
2079
49.5k
        cblk_len = 0;
2080
315k
        for (i = 0; i < cblk->numchunks; i++) {
2081
266k
            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2082
266k
            cblk_len += cblk->chunks[i].len;
2083
266k
        }
2084
37.6M
    } else if (cblk->numchunks == 1) {
2085
438k
        cblkdata = cblk->chunks[0].data;
2086
37.1M
    } else {
2087
        /* Not sure if that can happen in practice, but avoid Coverity to */
2088
        /* think we will dereference a null cblkdta pointer */
2089
37.1M
        return OPJ_TRUE;
2090
37.1M
    }
2091
2092
    /* For subtile decoding, directly decode in the decoded_data buffer of */
2093
    /* the code-block. Hack t1->data to point to it, and restore it later */
2094
487k
    if (cblk->decoded_data) {
2095
249k
        original_t1_data = t1->data;
2096
249k
        t1->data = cblk->decoded_data;
2097
249k
    }
2098
2099
1.14M
    for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2100
660k
        opj_tcd_seg_t *seg = &cblk->segs[segno];
2101
2102
        /* BYPASS mode */
2103
660k
        type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2104
599k
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2105
2106
660k
        if (type == T1_TYPE_RAW) {
2107
61.0k
            opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2108
61.0k
                                 OPJ_COMMON_CBLK_DATA_EXTRA);
2109
599k
        } else {
2110
599k
            opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2111
599k
                             OPJ_COMMON_CBLK_DATA_EXTRA);
2112
599k
        }
2113
660k
        cblkdataindex += seg->len;
2114
2115
6.47M
        for (passno = 0; (passno < seg->real_num_passes) &&
2116
5.89M
                (bpno_plus_one >= 1); ++passno) {
2117
5.81M
            switch (passtype) {
2118
1.85M
            case 0:
2119
1.85M
                if (type == T1_TYPE_RAW) {
2120
31.8k
                    opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2121
1.82M
                } else {
2122
1.82M
                    opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2123
1.82M
                }
2124
1.85M
                break;
2125
1.74M
            case 1:
2126
1.74M
                if (type == T1_TYPE_RAW) {
2127
30.7k
                    opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2128
1.71M
                } else {
2129
1.71M
                    opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2130
1.71M
                }
2131
1.74M
                break;
2132
2.20M
            case 2:
2133
2.20M
                opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2134
2.20M
                break;
2135
5.81M
            }
2136
2137
5.81M
            if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2138
3.82M
                opj_mqc_resetstates(mqc);
2139
3.82M
                opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2140
3.82M
                opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2141
3.82M
                opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2142
3.82M
            }
2143
5.81M
            if (++passtype == 3) {
2144
2.20M
                passtype = 0;
2145
2.20M
                bpno_plus_one--;
2146
2.20M
            }
2147
5.81M
        }
2148
2149
660k
        opq_mqc_finish_dec(mqc);
2150
660k
    }
2151
2152
487k
    if (check_pterm) {
2153
157k
        if (mqc->bp + 2 < mqc->end) {
2154
12.7k
            if (p_manager_mutex) {
2155
12.7k
                opj_mutex_lock(p_manager_mutex);
2156
12.7k
            }
2157
12.7k
            opj_event_msg(p_manager, EVT_WARNING,
2158
12.7k
                          "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2159
12.7k
                          (int)(mqc->end - mqc->bp) - 2,
2160
12.7k
                          (int)(mqc->bp - mqc->start),
2161
12.7k
                          (int)(mqc->end - mqc->start));
2162
12.7k
            if (p_manager_mutex) {
2163
12.7k
                opj_mutex_unlock(p_manager_mutex);
2164
12.7k
            }
2165
145k
        } else if (mqc->end_of_byte_stream_counter > 2) {
2166
124k
            if (p_manager_mutex) {
2167
124k
                opj_mutex_lock(p_manager_mutex);
2168
124k
            }
2169
124k
            opj_event_msg(p_manager, EVT_WARNING,
2170
124k
                          "PTERM check failure: %d synthesized 0xFF markers read\n",
2171
124k
                          mqc->end_of_byte_stream_counter);
2172
124k
            if (p_manager_mutex) {
2173
124k
                opj_mutex_unlock(p_manager_mutex);
2174
124k
            }
2175
124k
        }
2176
157k
    }
2177
2178
    /* Restore original t1->data is needed */
2179
487k
    if (cblk->decoded_data) {
2180
249k
        t1->data = original_t1_data;
2181
249k
    }
2182
2183
487k
    return OPJ_TRUE;
2184
487k
}
2185
2186
2187
typedef struct {
2188
    OPJ_UINT32 compno;
2189
    OPJ_UINT32 resno;
2190
    opj_tcd_cblk_enc_t* cblk;
2191
    opj_tcd_tile_t *tile;
2192
    opj_tcd_band_t* band;
2193
    opj_tcd_tilecomp_t* tilec;
2194
    opj_tccp_t* tccp;
2195
    const OPJ_FLOAT64 * mct_norms;
2196
    OPJ_UINT32 mct_numcomps;
2197
    volatile OPJ_BOOL* pret;
2198
    opj_mutex_t* mutex;
2199
} opj_t1_cblk_encode_processing_job_t;
2200
2201
/** Procedure to deal with a asynchronous code-block encoding job.
2202
 *
2203
 * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2204
 * @param tls       TLS handle.
2205
 */
2206
static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
2207
2.54M
{
2208
2.54M
    opj_t1_cblk_encode_processing_job_t* job =
2209
2.54M
        (opj_t1_cblk_encode_processing_job_t*)user_data;
2210
2.54M
    opj_tcd_cblk_enc_t* cblk = job->cblk;
2211
2.54M
    const opj_tcd_band_t* band = job->band;
2212
2.54M
    const opj_tcd_tilecomp_t* tilec = job->tilec;
2213
2.54M
    const opj_tccp_t* tccp = job->tccp;
2214
2.54M
    const OPJ_UINT32 resno = job->resno;
2215
2.54M
    opj_t1_t* t1;
2216
2.54M
    const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2217
2218
2.54M
    OPJ_INT32* OPJ_RESTRICT tiledp;
2219
2.54M
    OPJ_UINT32 cblk_w;
2220
2.54M
    OPJ_UINT32 cblk_h;
2221
2.54M
    OPJ_UINT32 i, j;
2222
2223
2.54M
    OPJ_INT32 x = cblk->x0 - band->x0;
2224
2.54M
    OPJ_INT32 y = cblk->y0 - band->y0;
2225
2226
2.54M
    if (!*(job->pret)) {
2227
0
        opj_free(job);
2228
0
        return;
2229
0
    }
2230
2231
2.54M
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2232
2.54M
    if (t1 == NULL) {
2233
12.8k
        t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2234
12.8k
        opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2235
12.8k
    }
2236
2237
2.54M
    if (band->bandno & 1) {
2238
1.67M
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2239
1.67M
        x += pres->x1 - pres->x0;
2240
1.67M
    }
2241
2.54M
    if (band->bandno & 2) {
2242
1.67M
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2243
1.67M
        y += pres->y1 - pres->y0;
2244
1.67M
    }
2245
2246
2.54M
    if (!opj_t1_allocate_buffers(
2247
2.54M
                t1,
2248
2.54M
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2249
2.54M
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2250
0
        *(job->pret) = OPJ_FALSE;
2251
0
        opj_free(job);
2252
0
        return;
2253
0
    }
2254
2255
2.54M
    cblk_w = t1->w;
2256
2.54M
    cblk_h = t1->h;
2257
2258
2.54M
    tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2259
2260
2.54M
    if (tccp->qmfbid == 1) {
2261
        /* Do multiplication on unsigned type, even if the
2262
            * underlying type is signed, to avoid potential
2263
            * int overflow on large value (the output will be
2264
            * incorrect in such situation, but whatever...)
2265
            * This assumes complement-to-2 signed integer
2266
            * representation
2267
            * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2268
            */
2269
1.10M
        OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2270
1.10M
        OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
2271
        /* Change from "natural" order to "zigzag" order of T1 passes */
2272
15.4M
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2273
#if defined(__AVX512F__)
2274
            const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13);
2275
            const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0);
2276
            OPJ_UINT32* ptr = tiledp_u;
2277
            for (i = 0; i < cblk_w / 16; ++i) {
2278
                //                      INPUT                                        OUTPUT
2279
                // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F   00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
2280
                // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F   04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
2281
                // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F   08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
2282
                // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F   0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
2283
                __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2284
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2285
                __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2286
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2287
                __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2288
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2289
                __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2290
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2291
2292
                __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2);
2293
                __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4);
2294
                __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2);
2295
                __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4);
2296
2297
                in1 = _mm512_unpacklo_epi64(tmp1, tmp2);
2298
                in2 = _mm512_unpacklo_epi64(tmp3, tmp4);
2299
                in3 = _mm512_unpackhi_epi64(tmp1, tmp2);
2300
                in4 = _mm512_unpackhi_epi64(tmp3, tmp4);
2301
2302
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1));
2303
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3));
2304
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2));
2305
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4));
2306
2307
                tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3);
2308
                tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4);
2309
2310
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1));
2311
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2));
2312
                _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1,
2313
                                    0x1));
2314
                _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2,
2315
                                    0x1));
2316
                _mm256_storeu_si256((__m256i*)(t1data + 48),
2317
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3)));
2318
                _mm256_storeu_si256((__m256i*)(t1data + 56),
2319
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4)));
2320
                t1data += 64;
2321
                ptr += 16;
2322
            }
2323
            for (i = 0; i < cblk_w % 16; ++i) {
2324
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2325
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2326
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2327
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2328
                t1data += 4;
2329
                ptr += 1;
2330
            }
2331
#elif defined(__AVX2__)
2332
            OPJ_UINT32* ptr = tiledp_u;
2333
            for (i = 0; i < cblk_w / 8; ++i) {
2334
                //          INPUT                  OUTPUT
2335
                // 00 01 02 03 04 05 06 07   00 10 20 30 01 11 21 31
2336
                // 10 11 12 13 14 15 16 17   02 12 22 32 03 13 23 33
2337
                // 20 21 22 23 24 25 26 27   04 14 24 34 05 15 25 35
2338
                // 30 31 32 33 34 35 36 37   06 16 26 36 07 17 27 37
2339
                __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2340
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2341
                __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2342
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2343
                __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2344
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2345
                __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2346
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2347
2348
                __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2);
2349
                __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4);
2350
                __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2);
2351
                __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4);
2352
2353
                in1 = _mm256_unpacklo_epi64(tmp1, tmp2);
2354
                in2 = _mm256_unpacklo_epi64(tmp3, tmp4);
2355
                in3 = _mm256_unpackhi_epi64(tmp1, tmp2);
2356
                in4 = _mm256_unpackhi_epi64(tmp3, tmp4);
2357
2358
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1));
2359
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3));
2360
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2));
2361
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4));
2362
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3,
2363
                                    0x31));
2364
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4,
2365
                                    0x31));
2366
                t1data += 32;
2367
                ptr += 8;
2368
            }
2369
            for (i = 0; i < cblk_w % 8; ++i) {
2370
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2371
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2372
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2373
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2374
                t1data += 4;
2375
                ptr += 1;
2376
            }
2377
#else
2378
852M
            for (i = 0; i < cblk_w; ++i) {
2379
838M
                t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2380
838M
                t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2381
838M
                t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2382
838M
                t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2383
838M
                t1data += 4;
2384
838M
            }
2385
14.3M
#endif
2386
14.3M
        }
2387
1.10M
        if (j < cblk_h) {
2388
8.67M
            for (i = 0; i < cblk_w; ++i) {
2389
8.47M
                OPJ_UINT32 k;
2390
23.8M
                for (k = j; k < cblk_h; k++) {
2391
15.4M
                    t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
2392
15.4M
                    t1data ++;
2393
15.4M
                }
2394
8.47M
            }
2395
199k
        }
2396
1.43M
    } else {        /* if (tccp->qmfbid == 0) */
2397
1.43M
        OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
2398
1.43M
        OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
2399
        /* Change from "natural" order to "zigzag" order of T1 passes */
2400
12.4M
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2401
351M
            for (i = 0; i < cblk_w; ++i) {
2402
340M
                t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
2403
340M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2404
340M
                t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
2405
340M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2406
340M
                t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
2407
340M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2408
340M
                t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
2409
340M
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2410
340M
                t1data += 4;
2411
340M
            }
2412
11.0M
        }
2413
1.43M
        if (j < cblk_h) {
2414
1.80M
            for (i = 0; i < cblk_w; ++i) {
2415
1.74M
                OPJ_UINT32 k;
2416
5.85M
                for (k = j; k < cblk_h; k++) {
2417
4.11M
                    t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
2418
4.11M
                                                      * (1 << T1_NMSEDEC_FRACBITS));
2419
4.11M
                    t1data ++;
2420
4.11M
                }
2421
1.74M
            }
2422
59.9k
        }
2423
1.43M
    }
2424
2425
2.54M
    {
2426
2.54M
        OPJ_FLOAT64 cumwmsedec =
2427
2.54M
            opj_t1_encode_cblk(
2428
2.54M
                t1,
2429
2.54M
                cblk,
2430
2.54M
                band->bandno,
2431
2.54M
                job->compno,
2432
2.54M
                tilec->numresolutions - 1 - resno,
2433
2.54M
                tccp->qmfbid,
2434
2.54M
                band->stepsize,
2435
2.54M
                tccp->cblksty,
2436
2.54M
                job->tile->numcomps,
2437
2.54M
                job->mct_norms,
2438
2.54M
                job->mct_numcomps);
2439
2.54M
        if (job->mutex) {
2440
2.54M
            opj_mutex_lock(job->mutex);
2441
2.54M
        }
2442
2.54M
        job->tile->distotile += cumwmsedec;
2443
2.54M
        if (job->mutex) {
2444
2.54M
            opj_mutex_unlock(job->mutex);
2445
2.54M
        }
2446
2.54M
    }
2447
2448
2.54M
    opj_free(job);
2449
2.54M
}
2450
2451
2452
OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2453
                             opj_tcd_tile_t *tile,
2454
                             opj_tcp_t *tcp,
2455
                             const OPJ_FLOAT64 * mct_norms,
2456
                             OPJ_UINT32 mct_numcomps
2457
                            )
2458
12.8k
{
2459
12.8k
    volatile OPJ_BOOL ret = OPJ_TRUE;
2460
12.8k
    opj_thread_pool_t* tp = tcd->thread_pool;
2461
12.8k
    OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2462
12.8k
    opj_mutex_t* mutex = opj_mutex_create();
2463
2464
12.8k
    tile->distotile = 0;
2465
2466
27.1k
    for (compno = 0; compno < tile->numcomps; ++compno) {
2467
14.3k
        opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2468
14.3k
        opj_tccp_t* tccp = &tcp->tccps[compno];
2469
2470
80.5k
        for (resno = 0; resno < tilec->numresolutions; ++resno) {
2471
66.2k
            opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2472
2473
236k
            for (bandno = 0; bandno < res->numbands; ++bandno) {
2474
170k
                opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2475
2476
                /* Skip empty bands */
2477
170k
                if (opj_tcd_is_band_empty(band)) {
2478
0
                    continue;
2479
0
                }
2480
452k
                for (precno = 0; precno < res->pw * res->ph; ++precno) {
2481
282k
                    opj_tcd_precinct_t *prc = &band->precincts[precno];
2482
2483
2.82M
                    for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2484
2.54M
                        opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2485
2486
2.54M
                        opj_t1_cblk_encode_processing_job_t* job =
2487
2.54M
                            (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2488
2.54M
                                    sizeof(opj_t1_cblk_encode_processing_job_t));
2489
2.54M
                        if (!job) {
2490
0
                            ret = OPJ_FALSE;
2491
0
                            goto end;
2492
0
                        }
2493
2.54M
                        job->compno = compno;
2494
2.54M
                        job->tile = tile;
2495
2.54M
                        job->resno = resno;
2496
2.54M
                        job->cblk = cblk;
2497
2.54M
                        job->band = band;
2498
2.54M
                        job->tilec = tilec;
2499
2.54M
                        job->tccp = tccp;
2500
2.54M
                        job->mct_norms = mct_norms;
2501
2.54M
                        job->mct_numcomps = mct_numcomps;
2502
2.54M
                        job->pret = &ret;
2503
2.54M
                        job->mutex = mutex;
2504
2.54M
                        opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
2505
2506
2.54M
                    } /* cblkno */
2507
282k
                } /* precno */
2508
170k
            } /* bandno */
2509
66.2k
        } /* resno  */
2510
14.3k
    } /* compno  */
2511
2512
12.8k
end:
2513
12.8k
    opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2514
12.8k
    if (mutex) {
2515
12.8k
        opj_mutex_destroy(mutex);
2516
12.8k
    }
2517
2518
12.8k
    return ret;
2519
12.8k
}
2520
2521
/* Returns whether the pass (bpno, passtype) is terminated */
2522
static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2523
                                   OPJ_UINT32 cblksty,
2524
                                   OPJ_INT32 bpno,
2525
                                   OPJ_UINT32 passtype)
2526
26.8M
{
2527
    /* Is it the last cleanup pass ? */
2528
26.8M
    if (passtype == 2 && bpno == 0) {
2529
943k
        return OPJ_TRUE;
2530
943k
    }
2531
2532
25.8M
    if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2533
0
        return OPJ_TRUE;
2534
0
    }
2535
2536
25.8M
    if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2537
        /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2538
0
        if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2539
0
            return OPJ_TRUE;
2540
0
        }
2541
        /* and beyond terminate all the magnitude refinement passes (in raw) */
2542
        /* and cleanup passes (in MQC) */
2543
0
        if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2544
0
            return OPJ_TRUE;
2545
0
        }
2546
0
    }
2547
2548
25.8M
    return OPJ_FALSE;
2549
25.8M
}
2550
2551
2552
static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2553
                                      opj_tcd_cblk_enc_t* cblk,
2554
                                      OPJ_UINT32 orient,
2555
                                      OPJ_UINT32 compno,
2556
                                      OPJ_UINT32 level,
2557
                                      OPJ_UINT32 qmfbid,
2558
                                      OPJ_FLOAT64 stepsize,
2559
                                      OPJ_UINT32 cblksty,
2560
                                      OPJ_UINT32 numcomps,
2561
                                      const OPJ_FLOAT64 * mct_norms,
2562
                                      OPJ_UINT32 mct_numcomps)
2563
2.54M
{
2564
2.54M
    OPJ_FLOAT64 cumwmsedec = 0.0;
2565
2566
2.54M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2567
2568
2.54M
    OPJ_UINT32 passno;
2569
2.54M
    OPJ_INT32 bpno;
2570
2.54M
    OPJ_UINT32 passtype;
2571
2.54M
    OPJ_INT32 nmsedec = 0;
2572
2.54M
    OPJ_INT32 max;
2573
2.54M
    OPJ_UINT32 i, j;
2574
2.54M
    OPJ_BYTE type = T1_TYPE_MQ;
2575
2.54M
    OPJ_FLOAT64 tempwmsedec;
2576
2.54M
    OPJ_INT32* datap;
2577
2578
#ifdef EXTRA_DEBUG
2579
    printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2580
           cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2581
#endif
2582
2583
2.54M
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2584
2585
2.54M
    max = 0;
2586
2.54M
    datap = t1->data;
2587
104M
    for (j = 0; j < t1->h; ++j) {
2588
102M
        const OPJ_UINT32 w = t1->w;
2589
4.83G
        for (i = 0; i < w; ++i, ++datap) {
2590
4.73G
            OPJ_INT32 tmp = *datap;
2591
4.73G
            if (tmp < 0) {
2592
319M
                OPJ_UINT32 tmp_unsigned;
2593
319M
                if (tmp == INT_MIN) {
2594
                    /* To avoid undefined behaviour when negating INT_MIN */
2595
                    /* but if we go here, it means we have supplied an input */
2596
                    /* with more bit depth than we we can really support. */
2597
                    /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */
2598
179k
                    tmp = INT_MIN + 1;
2599
179k
                }
2600
319M
                max = opj_int_max(max, -tmp);
2601
319M
                tmp_unsigned = opj_to_smr(tmp);
2602
319M
                memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
2603
4.41G
            } else {
2604
4.41G
                max = opj_int_max(max, tmp);
2605
4.41G
            }
2606
4.73G
        }
2607
102M
    }
2608
2609
2.54M
    cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2610
1.57M
                                      T1_NMSEDEC_FRACBITS) : 0;
2611
2.54M
    if (cblk->numbps == 0) {
2612
1.58M
        cblk->totalpasses = 0;
2613
1.58M
        return cumwmsedec;
2614
1.58M
    }
2615
2616
959k
    bpno = (OPJ_INT32)(cblk->numbps - 1);
2617
959k
    passtype = 2;
2618
2619
959k
    opj_mqc_resetstates(mqc);
2620
959k
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2621
959k
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2622
959k
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2623
959k
    opj_mqc_init_enc(mqc, cblk->data);
2624
2625
27.7M
    for (passno = 0; bpno >= 0; ++passno) {
2626
26.8M
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2627
26.8M
        type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2628
26.8M
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2629
2630
        /* If the previous pass was terminating, we need to reset the encoder */
2631
26.8M
        if (passno > 0 && cblk->passes[passno - 1].term) {
2632
0
            if (type == T1_TYPE_RAW) {
2633
0
                opj_mqc_bypass_init_enc(mqc);
2634
0
            } else {
2635
0
                opj_mqc_restart_init_enc(mqc);
2636
0
            }
2637
0
        }
2638
2639
26.8M
        switch (passtype) {
2640
8.62M
        case 0:
2641
8.62M
            opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2642
8.62M
            break;
2643
8.62M
        case 1:
2644
8.62M
            opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2645
8.62M
            break;
2646
9.56M
        case 2:
2647
9.56M
            opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2648
            /* code switch SEGMARK (i.e. SEGSYM) */
2649
9.56M
            if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2650
0
                opj_mqc_segmark_enc(mqc);
2651
0
            }
2652
9.56M
            break;
2653
26.8M
        }
2654
2655
26.8M
        tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2656
26.8M
                                        stepsize, numcomps, mct_norms, mct_numcomps) ;
2657
26.8M
        cumwmsedec += tempwmsedec;
2658
26.8M
        pass->distortiondec = cumwmsedec;
2659
2660
26.8M
        if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2661
            /* If it is a terminated pass, terminate it */
2662
943k
            if (type == T1_TYPE_RAW) {
2663
0
                opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2664
943k
            } else {
2665
943k
                if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2666
0
                    opj_mqc_erterm_enc(mqc);
2667
943k
                } else {
2668
943k
                    opj_mqc_flush(mqc);
2669
943k
                }
2670
943k
            }
2671
943k
            pass->term = 1;
2672
943k
            pass->rate = opj_mqc_numbytes(mqc);
2673
25.8M
        } else {
2674
            /* Non terminated pass */
2675
25.8M
            OPJ_UINT32 rate_extra_bytes;
2676
25.8M
            if (type == T1_TYPE_RAW) {
2677
0
                rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2678
0
                                       mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2679
25.8M
            } else {
2680
25.8M
                rate_extra_bytes = 3;
2681
25.8M
            }
2682
25.8M
            pass->term = 0;
2683
25.8M
            pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2684
25.8M
        }
2685
2686
26.8M
        if (++passtype == 3) {
2687
9.56M
            passtype = 0;
2688
9.56M
            bpno--;
2689
9.56M
        }
2690
2691
        /* Code-switch "RESET" */
2692
26.8M
        if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2693
0
            opj_mqc_reset_enc(mqc);
2694
0
        }
2695
26.8M
    }
2696
2697
959k
    cblk->totalpasses = passno;
2698
2699
959k
    if (cblk->totalpasses) {
2700
        /* Make sure that pass rates are increasing */
2701
943k
        OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2702
27.7M
        for (passno = cblk->totalpasses; passno > 0;) {
2703
26.8M
            opj_tcd_pass_t *pass = &cblk->passes[--passno];
2704
26.8M
            if (pass->rate > last_pass_rate) {
2705
600k
                pass->rate = last_pass_rate;
2706
26.2M
            } else {
2707
26.2M
                last_pass_rate = pass->rate;
2708
26.2M
            }
2709
26.8M
        }
2710
943k
    }
2711
2712
27.7M
    for (passno = 0; passno < cblk->totalpasses; passno++) {
2713
26.8M
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2714
2715
        /* Prevent generation of FF as last data byte of a pass*/
2716
        /* For terminating passes, the flushing procedure ensured this already */
2717
26.8M
        assert(pass->rate > 0);
2718
26.8M
        if (cblk->data[pass->rate - 1] == 0xFF) {
2719
240k
            pass->rate--;
2720
240k
        }
2721
26.8M
        pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2722
26.8M
    }
2723
2724
#ifdef EXTRA_DEBUG
2725
    printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2726
2727
    /* Check that there not 0xff >=0x90 sequences */
2728
    if (cblk->totalpasses) {
2729
        OPJ_UINT32 i;
2730
        OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2731
        for (i = 1; i < len; ++i) {
2732
            if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2733
                printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2734
                abort();
2735
            }
2736
        }
2737
    }
2738
#endif
2739
2740
959k
    return cumwmsedec;
2741
959k
}