Coverage Report

Created: 2025-07-23 07:06

/src/opencv/3rdparty/openjpeg/openjp2/t1.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * The copyright in this software is being made available under the 2-clauses
3
 * BSD License, included below. This software may be subject to other third
4
 * party and contributor rights, including patent rights, and no such rights
5
 * are granted under this license.
6
 *
7
 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8
 * Copyright (c) 2002-2014, Professor Benoit Macq
9
 * Copyright (c) 2001-2003, David Janssens
10
 * Copyright (c) 2002-2003, Yannick Verschueren
11
 * Copyright (c) 2003-2007, Francois-Olivier Devaux
12
 * Copyright (c) 2003-2014, Antonin Descampe
13
 * Copyright (c) 2005, Herve Drolon, FreeImage Team
14
 * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15
 * Copyright (c) 2012, Carl Hetherington
16
 * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17
 * All rights reserved.
18
 *
19
 * Redistribution and use in source and binary forms, with or without
20
 * modification, are permitted provided that the following conditions
21
 * are met:
22
 * 1. Redistributions of source code must retain the above copyright
23
 *    notice, this list of conditions and the following disclaimer.
24
 * 2. Redistributions in binary form must reproduce the above copyright
25
 *    notice, this list of conditions and the following disclaimer in the
26
 *    documentation and/or other materials provided with the distribution.
27
 *
28
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
 * POSSIBILITY OF SUCH DAMAGE.
39
 */
40
41
#define OPJ_SKIP_POISON
42
#include "opj_includes.h"
43
44
#ifdef __SSE__
45
#include <xmmintrin.h>
46
#endif
47
#ifdef __SSE2__
48
#include <emmintrin.h>
49
#endif
50
#if (defined(__AVX2__) || defined(__AVX512F__))
51
#include <immintrin.h>
52
#endif
53
54
#if defined(__GNUC__)
55
#pragma GCC poison malloc calloc realloc free
56
#endif
57
58
#include "t1_luts.h"
59
60
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
61
/*@{*/
62
63
74.3k
#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
64
65
2.28G
#define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
66
67
/* Macros to deal with signed integer with just MSB bit set for
68
 * negative values (smr = signed magnitude representation) */
69
0
#define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
70
0
#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
71
0
#define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
72
73
74
/** @name Local static functions */
75
/*@{*/
76
77
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
78
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
79
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
80
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
81
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
82
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
83
                                       OPJ_UINT32 vsc);
84
85
86
/**
87
Decode significant pass
88
*/
89
90
static INLINE void opj_t1_dec_sigpass_step_raw(
91
    opj_t1_t *t1,
92
    opj_flag_t *flagsp,
93
    OPJ_INT32 *datap,
94
    OPJ_INT32 oneplushalf,
95
    OPJ_UINT32 vsc,
96
    OPJ_UINT32 row);
97
static INLINE void opj_t1_dec_sigpass_step_mqc(
98
    opj_t1_t *t1,
99
    opj_flag_t *flagsp,
100
    OPJ_INT32 *datap,
101
    OPJ_INT32 oneplushalf,
102
    OPJ_UINT32 row,
103
    OPJ_UINT32 flags_stride,
104
    OPJ_UINT32 vsc);
105
106
/**
107
Encode significant pass
108
*/
109
static void opj_t1_enc_sigpass(opj_t1_t *t1,
110
                               OPJ_INT32 bpno,
111
                               OPJ_INT32 *nmsedec,
112
                               OPJ_BYTE type,
113
                               OPJ_UINT32 cblksty);
114
115
/**
116
Decode significant pass
117
*/
118
static void opj_t1_dec_sigpass_raw(
119
    opj_t1_t *t1,
120
    OPJ_INT32 bpno,
121
    OPJ_INT32 cblksty);
122
123
/**
124
Encode refinement pass
125
*/
126
static void opj_t1_enc_refpass(opj_t1_t *t1,
127
                               OPJ_INT32 bpno,
128
                               OPJ_INT32 *nmsedec,
129
                               OPJ_BYTE type);
130
131
/**
132
Decode refinement pass
133
*/
134
static void opj_t1_dec_refpass_raw(
135
    opj_t1_t *t1,
136
    OPJ_INT32 bpno);
137
138
139
/**
140
Decode refinement pass
141
*/
142
143
static INLINE void  opj_t1_dec_refpass_step_raw(
144
    opj_t1_t *t1,
145
    opj_flag_t *flagsp,
146
    OPJ_INT32 *datap,
147
    OPJ_INT32 poshalf,
148
    OPJ_UINT32 row);
149
static INLINE void opj_t1_dec_refpass_step_mqc(
150
    opj_t1_t *t1,
151
    opj_flag_t *flagsp,
152
    OPJ_INT32 *datap,
153
    OPJ_INT32 poshalf,
154
    OPJ_UINT32 row);
155
156
157
/**
158
Decode clean-up pass
159
*/
160
161
static void opj_t1_dec_clnpass_step(
162
    opj_t1_t *t1,
163
    opj_flag_t *flagsp,
164
    OPJ_INT32 *datap,
165
    OPJ_INT32 oneplushalf,
166
    OPJ_UINT32 row,
167
    OPJ_UINT32 vsc);
168
169
/**
170
Encode clean-up pass
171
*/
172
static void opj_t1_enc_clnpass(
173
    opj_t1_t *t1,
174
    OPJ_INT32 bpno,
175
    OPJ_INT32 *nmsedec,
176
    OPJ_UINT32 cblksty);
177
178
static OPJ_FLOAT64 opj_t1_getwmsedec(
179
    OPJ_INT32 nmsedec,
180
    OPJ_UINT32 compno,
181
    OPJ_UINT32 level,
182
    OPJ_UINT32 orient,
183
    OPJ_INT32 bpno,
184
    OPJ_UINT32 qmfbid,
185
    OPJ_FLOAT64 stepsize,
186
    OPJ_UINT32 numcomps,
187
    const OPJ_FLOAT64 * mct_norms,
188
    OPJ_UINT32 mct_numcomps);
189
190
/** Return "cumwmsedec" that should be used to increase tile->distotile */
191
static double opj_t1_encode_cblk(opj_t1_t *t1,
192
                                 opj_tcd_cblk_enc_t* cblk,
193
                                 OPJ_UINT32 orient,
194
                                 OPJ_UINT32 compno,
195
                                 OPJ_UINT32 level,
196
                                 OPJ_UINT32 qmfbid,
197
                                 OPJ_FLOAT64 stepsize,
198
                                 OPJ_UINT32 cblksty,
199
                                 OPJ_UINT32 numcomps,
200
                                 const OPJ_FLOAT64 * mct_norms,
201
                                 OPJ_UINT32 mct_numcomps);
202
203
/**
204
Decode 1 code-block
205
@param t1 T1 handle
206
@param cblk Code-block coding parameters
207
@param orient
208
@param roishift Region of interest shifting value
209
@param cblksty Code-block style
210
@param p_manager the event manager
211
@param p_manager_mutex mutex for the event manager
212
@param check_pterm whether PTERM correct termination should be checked
213
*/
214
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
215
                                   opj_tcd_cblk_dec_t* cblk,
216
                                   OPJ_UINT32 orient,
217
                                   OPJ_UINT32 roishift,
218
                                   OPJ_UINT32 cblksty,
219
                                   opj_event_mgr_t *p_manager,
220
                                   opj_mutex_t* p_manager_mutex,
221
                                   OPJ_BOOL check_pterm);
222
223
/**
224
Decode 1 HT code-block
225
@param t1 T1 handle
226
@param cblk Code-block coding parameters
227
@param orient
228
@param roishift Region of interest shifting value
229
@param cblksty Code-block style
230
@param p_manager the event manager
231
@param p_manager_mutex mutex for the event manager
232
@param check_pterm whether PTERM correct termination should be checked
233
*/
234
OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
235
                               opj_tcd_cblk_dec_t* cblk,
236
                               OPJ_UINT32 orient,
237
                               OPJ_UINT32 roishift,
238
                               OPJ_UINT32 cblksty,
239
                               opj_event_mgr_t *p_manager,
240
                               opj_mutex_t* p_manager_mutex,
241
                               OPJ_BOOL check_pterm);
242
243
244
static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
245
                                        OPJ_UINT32 w,
246
                                        OPJ_UINT32 h);
247
248
/*@}*/
249
250
/*@}*/
251
252
/* ----------------------------------------------------------------------- */
253
254
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
255
810M
{
256
810M
    return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
257
810M
}
258
259
static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
260
        OPJ_UINT32 pfX,
261
        OPJ_UINT32 nfX,
262
        OPJ_UINT32 ci)
263
365M
{
264
    /*
265
      0 pfX T1_CHI_THIS           T1_LUT_SGN_W
266
      1 tfX T1_SIGMA_1            T1_LUT_SIG_N
267
      2 nfX T1_CHI_THIS           T1_LUT_SGN_E
268
      3 tfX T1_SIGMA_3            T1_LUT_SIG_W
269
      4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
270
      5 tfX T1_SIGMA_5            T1_LUT_SIG_E
271
      6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
272
      7 tfX T1_SIGMA_7            T1_LUT_SIG_S
273
    */
274
275
365M
    OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
276
365M
                                         T1_SIGMA_7);
277
278
365M
    lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
279
365M
    lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
280
365M
    if (ci == 0U) {
281
91.8M
        lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
282
273M
    } else {
283
273M
        lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
284
273M
    }
285
365M
    lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
286
365M
    return lu;
287
365M
}
288
289
static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
290
365M
{
291
365M
    return lut_ctxno_sc[lu];
292
365M
}
293
294
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
295
907M
{
296
907M
    OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
297
907M
    OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
298
907M
    return tmp2;
299
907M
}
300
301
static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
302
365M
{
303
365M
    return lut_spb[lu];
304
365M
}
305
306
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
307
0
{
308
0
    if (bitpos > 0) {
309
0
        return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
310
0
    }
311
312
0
    return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
313
0
}
314
315
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
316
0
{
317
0
    if (bitpos > 0) {
318
0
        return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
319
0
    }
320
321
0
    return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
322
0
}
323
324
368M
#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
325
368M
{ \
326
368M
    /* east */ \
327
368M
    flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
328
368M
 \
329
368M
    /* mark target as significant */ \
330
368M
    flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
331
368M
 \
332
368M
    /* west */ \
333
368M
    flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
334
368M
 \
335
368M
    /* north-west, north, north-east */ \
336
368M
    if (ci == 0U && !(vsc)) { \
337
41.8M
        opj_flag_t* north = flagsp - (stride); \
338
41.8M
        *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
339
41.8M
        north[-1] |= T1_SIGMA_17; \
340
41.8M
        north[1] |= T1_SIGMA_15; \
341
41.8M
    } \
342
368M
 \
343
368M
    /* south-west, south, south-east */ \
344
368M
    if (ci == 3U) { \
345
90.4M
        opj_flag_t* south = flagsp + (stride); \
346
90.4M
        *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
347
90.4M
        south[-1] |= T1_SIGMA_2; \
348
90.4M
        south[1] |= T1_SIGMA_0; \
349
90.4M
    } \
350
368M
}
351
352
353
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
354
                                       OPJ_UINT32 s, OPJ_UINT32 stride,
355
                                       OPJ_UINT32 vsc)
356
3.40M
{
357
3.40M
    opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
358
3.40M
}
359
360
/**
361
Encode significant pass
362
*/
363
0
#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
364
0
{ \
365
0
    OPJ_UINT32 v; \
366
0
    const OPJ_UINT32 ci = (ciIn); \
367
0
    const OPJ_UINT32 vsc = (vscIn); \
368
0
    const OPJ_INT32* l_datap = (datapIn); \
369
0
    opj_flag_t* flagsp = (flagspIn); \
370
0
    OPJ_UINT32 const flags = *flagsp; \
371
0
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
372
0
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
373
0
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
374
0
        v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
375
0
/* #ifdef DEBUG_ENC_SIG */ \
376
0
/*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
377
0
/* #endif */ \
378
0
        opj_t1_setcurctx(curctx, ctxt1); \
379
0
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
380
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
381
0
        } else { \
382
0
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
383
0
        } \
384
0
        if (v) { \
385
0
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
386
0
                                *flagsp, \
387
0
                                flagsp[-1], flagsp[1], \
388
0
                                ci); \
389
0
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
390
0
            v = opj_smr_sign(*l_datap); \
391
0
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
392
0
                                              (OPJ_UINT32)bpno); \
393
0
/* #ifdef DEBUG_ENC_SIG */ \
394
0
/*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
395
0
/* #endif */ \
396
0
            opj_t1_setcurctx(curctx, ctxt2); \
397
0
            if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
398
0
                opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
399
0
            } else { \
400
0
                OPJ_UINT32 spb = opj_t1_getspb(lu); \
401
0
/* #ifdef DEBUG_ENC_SIG */ \
402
0
/*                fprintf(stderr, "   spb=%d\n", spb); */ \
403
0
/* #endif */ \
404
0
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
405
0
            } \
406
0
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
407
0
        } \
408
0
        *flagsp |= T1_PI_THIS << (ci * 3U); \
409
0
    } \
410
0
}
411
412
static INLINE void opj_t1_dec_sigpass_step_raw(
413
    opj_t1_t *t1,
414
    opj_flag_t *flagsp,
415
    OPJ_INT32 *datap,
416
    OPJ_INT32 oneplushalf,
417
    OPJ_UINT32 vsc,
418
    OPJ_UINT32 ci)
419
48.6M
{
420
48.6M
    OPJ_UINT32 v;
421
48.6M
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
422
423
48.6M
    OPJ_UINT32 const flags = *flagsp;
424
425
48.6M
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
426
48.6M
            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
427
4.31M
        if (opj_mqc_raw_decode(mqc)) {
428
3.40M
            v = opj_mqc_raw_decode(mqc);
429
3.40M
            *datap = v ? -oneplushalf : oneplushalf;
430
3.40M
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
431
3.40M
        }
432
4.31M
        *flagsp |= T1_PI_THIS << (ci * 3U);
433
4.31M
    }
434
48.6M
}
435
436
#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
437
                                          data_stride, ci, mqc, curctx, \
438
1.36G
                                          v, a, c, ct, oneplushalf, vsc) \
439
1.36G
{ \
440
1.36G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
441
1.36G
        (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
442
372M
        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
443
372M
        opj_t1_setcurctx(curctx, ctxt1); \
444
372M
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
445
372M
        if (v) { \
446
171M
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
447
171M
                                flags, \
448
171M
                                flagsp[-1], flagsp[1], \
449
171M
                                ci); \
450
171M
            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
451
171M
            OPJ_UINT32 spb = opj_t1_getspb(lu); \
452
171M
            opj_t1_setcurctx(curctx, ctxt2); \
453
171M
            opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
454
171M
            v = v ^ spb; \
455
171M
            data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
456
171M
            opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
457
171M
        } \
458
372M
        flags |= T1_PI_THIS << (ci * 3U); \
459
372M
    } \
460
1.36G
}
461
462
static INLINE void opj_t1_dec_sigpass_step_mqc(
463
    opj_t1_t *t1,
464
    opj_flag_t *flagsp,
465
    OPJ_INT32 *datap,
466
    OPJ_INT32 oneplushalf,
467
    OPJ_UINT32 ci,
468
    OPJ_UINT32 flags_stride,
469
    OPJ_UINT32 vsc)
470
19.2M
{
471
19.2M
    OPJ_UINT32 v;
472
473
19.2M
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
474
19.2M
    opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
475
19.2M
                                      0, ci, mqc, mqc->curctx,
476
19.2M
                                      v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
477
19.2M
}
478
479
static void opj_t1_enc_sigpass(opj_t1_t *t1,
480
                               OPJ_INT32 bpno,
481
                               OPJ_INT32 *nmsedec,
482
                               OPJ_BYTE type,
483
                               OPJ_UINT32 cblksty
484
                              )
485
0
{
486
0
    OPJ_UINT32 i, k;
487
0
    OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
488
0
    opj_flag_t* f = &T1_FLAGS(0, 0);
489
0
    OPJ_UINT32 const extra = 2;
490
0
    opj_mqc_t* mqc = &(t1->mqc);
491
0
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
492
0
    const OPJ_INT32* datap = t1->data;
493
494
0
    *nmsedec = 0;
495
#ifdef DEBUG_ENC_SIG
496
    fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
497
#endif
498
0
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
499
0
        const OPJ_UINT32 w = t1->w;
500
#ifdef DEBUG_ENC_SIG
501
        fprintf(stderr, " k=%d\n", k);
502
#endif
503
0
        for (i = 0; i < w; ++i, ++f, datap += 4) {
504
#ifdef DEBUG_ENC_SIG
505
            fprintf(stderr, " i=%d\n", i);
506
#endif
507
0
            if (*f == 0U) {
508
                /* Nothing to do for any of the 4 data points */
509
0
                continue;
510
0
            }
511
0
            opj_t1_enc_sigpass_step_macro(
512
0
                mqc, curctx, a, c, ct,
513
0
                f,
514
0
                &datap[0],
515
0
                bpno,
516
0
                one,
517
0
                nmsedec,
518
0
                type,
519
0
                0, cblksty & J2K_CCP_CBLKSTY_VSC);
520
0
            opj_t1_enc_sigpass_step_macro(
521
0
                mqc, curctx, a, c, ct,
522
0
                f,
523
0
                &datap[1],
524
0
                bpno,
525
0
                one,
526
0
                nmsedec,
527
0
                type,
528
0
                1, 0);
529
0
            opj_t1_enc_sigpass_step_macro(
530
0
                mqc, curctx, a, c, ct,
531
0
                f,
532
0
                &datap[2],
533
0
                bpno,
534
0
                one,
535
0
                nmsedec,
536
0
                type,
537
0
                2, 0);
538
0
            opj_t1_enc_sigpass_step_macro(
539
0
                mqc, curctx, a, c, ct,
540
0
                f,
541
0
                &datap[3],
542
0
                bpno,
543
0
                one,
544
0
                nmsedec,
545
0
                type,
546
0
                3, 0);
547
0
        }
548
0
    }
549
550
0
    if (k < t1->h) {
551
0
        OPJ_UINT32 j;
552
#ifdef DEBUG_ENC_SIG
553
        fprintf(stderr, " k=%d\n", k);
554
#endif
555
0
        for (i = 0; i < t1->w; ++i, ++f) {
556
#ifdef DEBUG_ENC_SIG
557
            fprintf(stderr, " i=%d\n", i);
558
#endif
559
0
            if (*f == 0U) {
560
                /* Nothing to do for any of the 4 data points */
561
0
                datap += (t1->h - k);
562
0
                continue;
563
0
            }
564
0
            for (j = k; j < t1->h; ++j, ++datap) {
565
0
                opj_t1_enc_sigpass_step_macro(
566
0
                    mqc, curctx, a, c, ct,
567
0
                    f,
568
0
                    &datap[0],
569
0
                    bpno,
570
0
                    one,
571
0
                    nmsedec,
572
0
                    type,
573
0
                    j - k,
574
0
                    (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
575
0
            }
576
0
        }
577
0
    }
578
579
0
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
580
0
}
581
582
static void opj_t1_dec_sigpass_raw(
583
    opj_t1_t *t1,
584
    OPJ_INT32 bpno,
585
    OPJ_INT32 cblksty)
586
37.9k
{
587
37.9k
    OPJ_INT32 one, half, oneplushalf;
588
37.9k
    OPJ_UINT32 i, j, k;
589
37.9k
    OPJ_INT32 *data = t1->data;
590
37.9k
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
591
37.9k
    const OPJ_UINT32 l_w = t1->w;
592
37.9k
    one = 1 << bpno;
593
37.9k
    half = one >> 1;
594
37.9k
    oneplushalf = one | half;
595
596
325k
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
597
14.0M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
598
13.7M
            opj_flag_t flags = *flagsp;
599
13.7M
            if (flags != 0) {
600
11.4M
                opj_t1_dec_sigpass_step_raw(
601
11.4M
                    t1,
602
11.4M
                    flagsp,
603
11.4M
                    data,
604
11.4M
                    oneplushalf,
605
11.4M
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
606
11.4M
                    0U);
607
11.4M
                opj_t1_dec_sigpass_step_raw(
608
11.4M
                    t1,
609
11.4M
                    flagsp,
610
11.4M
                    data + l_w,
611
11.4M
                    oneplushalf,
612
11.4M
                    OPJ_FALSE, /* vsc */
613
11.4M
                    1U);
614
11.4M
                opj_t1_dec_sigpass_step_raw(
615
11.4M
                    t1,
616
11.4M
                    flagsp,
617
11.4M
                    data + 2 * l_w,
618
11.4M
                    oneplushalf,
619
11.4M
                    OPJ_FALSE, /* vsc */
620
11.4M
                    2U);
621
11.4M
                opj_t1_dec_sigpass_step_raw(
622
11.4M
                    t1,
623
11.4M
                    flagsp,
624
11.4M
                    data + 3 * l_w,
625
11.4M
                    oneplushalf,
626
11.4M
                    OPJ_FALSE, /* vsc */
627
11.4M
                    3U);
628
11.4M
            }
629
13.7M
        }
630
287k
    }
631
37.9k
    if (k < t1->h) {
632
1.07M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
633
4.03M
            for (j = 0; j < t1->h - k; ++j) {
634
2.95M
                opj_t1_dec_sigpass_step_raw(
635
2.95M
                    t1,
636
2.95M
                    flagsp,
637
2.95M
                    data + j * l_w,
638
2.95M
                    oneplushalf,
639
2.95M
                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
640
2.95M
                    j);
641
2.95M
            }
642
1.07M
        }
643
2.37k
    }
644
37.9k
}
645
646
815k
#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
647
815k
{ \
648
815k
        OPJ_INT32 one, half, oneplushalf; \
649
815k
        OPJ_UINT32 i, j, k; \
650
815k
        register OPJ_INT32 *data = t1->data; \
651
815k
        register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
652
815k
        const OPJ_UINT32 l_w = w; \
653
815k
        opj_mqc_t* mqc = &(t1->mqc); \
654
815k
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
655
815k
        register OPJ_UINT32 v; \
656
815k
        one = 1 << bpno; \
657
815k
        half = one >> 1; \
658
815k
        oneplushalf = one | half; \
659
10.0M
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
660
462M
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
661
452M
                        opj_flag_t flags = *flagsp; \
662
452M
                        if( flags != 0 ) { \
663
336M
                            opj_t1_dec_sigpass_step_mqc_macro( \
664
336M
                                flags, flagsp, flags_stride, data, \
665
336M
                                l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
666
336M
                            opj_t1_dec_sigpass_step_mqc_macro( \
667
336M
                                flags, flagsp, flags_stride, data, \
668
336M
                                l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
669
336M
                            opj_t1_dec_sigpass_step_mqc_macro( \
670
336M
                                flags, flagsp, flags_stride, data, \
671
336M
                                l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
672
336M
                            opj_t1_dec_sigpass_step_mqc_macro( \
673
336M
                                flags, flagsp, flags_stride, data, \
674
336M
                                l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
675
336M
                            *flagsp = flags; \
676
336M
                        } \
677
452M
                } \
678
9.28M
        } \
679
815k
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
680
815k
        if( k < h ) { \
681
7.56M
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
682
26.6M
                for (j = 0; j < h - k; ++j) { \
683
19.2M
                        opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
684
19.2M
                            data + j * l_w, oneplushalf, j, flags_stride, vsc); \
685
19.2M
                } \
686
7.48M
            } \
687
76.0k
        } \
688
815k
}
689
690
static void opj_t1_dec_sigpass_mqc_64x64_novsc(
691
    opj_t1_t *t1,
692
    OPJ_INT32 bpno)
693
89.8k
{
694
89.8k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
695
89.8k
}
696
697
static void opj_t1_dec_sigpass_mqc_64x64_vsc(
698
    opj_t1_t *t1,
699
    OPJ_INT32 bpno)
700
159k
{
701
159k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
702
159k
}
703
704
static void opj_t1_dec_sigpass_mqc_generic_novsc(
705
    opj_t1_t *t1,
706
    OPJ_INT32 bpno)
707
201k
{
708
201k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
709
201k
                                    t1->w + 2U);
710
201k
}
711
712
static void opj_t1_dec_sigpass_mqc_generic_vsc(
713
    opj_t1_t *t1,
714
    OPJ_INT32 bpno)
715
364k
{
716
364k
    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
717
364k
                                    t1->w + 2U);
718
364k
}
719
720
static void opj_t1_dec_sigpass_mqc(
721
    opj_t1_t *t1,
722
    OPJ_INT32 bpno,
723
    OPJ_INT32 cblksty)
724
815k
{
725
815k
    if (t1->w == 64 && t1->h == 64) {
726
249k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
727
159k
            opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
728
159k
        } else {
729
89.8k
            opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
730
89.8k
        }
731
565k
    } else {
732
565k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
733
364k
            opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
734
364k
        } else {
735
201k
            opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
736
201k
        }
737
565k
    }
738
815k
}
739
740
/**
741
Encode refinement pass step
742
*/
743
0
#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
744
0
{\
745
0
    OPJ_UINT32 v; \
746
0
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
747
0
        const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
748
0
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
749
0
        OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
750
0
        *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
751
0
                                          (OPJ_UINT32)bpno); \
752
0
        v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
753
0
/* #ifdef DEBUG_ENC_REF */ \
754
0
/*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
755
0
/* #endif */ \
756
0
        opj_t1_setcurctx(curctx, ctxt); \
757
0
        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
758
0
            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
759
0
        } else { \
760
0
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
761
0
        } \
762
0
        flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
763
0
    } \
764
0
}
765
766
767
static INLINE void opj_t1_dec_refpass_step_raw(
768
    opj_t1_t *t1,
769
    opj_flag_t *flagsp,
770
    OPJ_INT32 *datap,
771
    OPJ_INT32 poshalf,
772
    OPJ_UINT32 ci)
773
43.5M
{
774
43.5M
    OPJ_UINT32 v;
775
776
43.5M
    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
777
778
43.5M
    if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
779
43.5M
            (T1_SIGMA_THIS << (ci * 3U))) {
780
38.9M
        v = opj_mqc_raw_decode(mqc);
781
38.9M
        *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
782
38.9M
        *flagsp |= T1_MU_THIS << (ci * 3U);
783
38.9M
    }
784
43.5M
}
785
786
#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
787
1.25G
                                          mqc, curctx, v, a, c, ct, poshalf) \
788
1.25G
{ \
789
1.25G
    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
790
1.25G
            (T1_SIGMA_THIS << (ci * 3U))) { \
791
907M
        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
792
907M
        opj_t1_setcurctx(curctx, ctxt); \
793
907M
        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
794
907M
        data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
795
907M
        flags |= T1_MU_THIS << (ci * 3U); \
796
907M
    } \
797
1.25G
}
798
799
static INLINE void opj_t1_dec_refpass_step_mqc(
800
    opj_t1_t *t1,
801
    opj_flag_t *flagsp,
802
    OPJ_INT32 *datap,
803
    OPJ_INT32 poshalf,
804
    OPJ_UINT32 ci)
805
17.0M
{
806
17.0M
    OPJ_UINT32 v;
807
808
17.0M
    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
809
17.0M
    opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
810
17.0M
                                      mqc, mqc->curctx, v, mqc->a, mqc->c,
811
17.0M
                                      mqc->ct, poshalf);
812
17.0M
}
813
814
static void opj_t1_enc_refpass(
815
    opj_t1_t *t1,
816
    OPJ_INT32 bpno,
817
    OPJ_INT32 *nmsedec,
818
    OPJ_BYTE type)
819
0
{
820
0
    OPJ_UINT32 i, k;
821
0
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
822
0
    opj_flag_t* f = &T1_FLAGS(0, 0);
823
0
    const OPJ_UINT32 extra = 2U;
824
0
    opj_mqc_t* mqc = &(t1->mqc);
825
0
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
826
0
    const OPJ_INT32* datap = t1->data;
827
828
0
    *nmsedec = 0;
829
#ifdef DEBUG_ENC_REF
830
    fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
831
#endif
832
0
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
833
#ifdef DEBUG_ENC_REF
834
        fprintf(stderr, " k=%d\n", k);
835
#endif
836
0
        for (i = 0; i < t1->w; ++i, f++, datap += 4) {
837
0
            const OPJ_UINT32 flags = *f;
838
0
            OPJ_UINT32 flagsUpdated = flags;
839
#ifdef DEBUG_ENC_REF
840
            fprintf(stderr, " i=%d\n", i);
841
#endif
842
0
            if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
843
                /* none significant */
844
0
                continue;
845
0
            }
846
0
            if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
847
0
                    (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
848
                /* all processed by sigpass */
849
0
                continue;
850
0
            }
851
852
0
            opj_t1_enc_refpass_step_macro(
853
0
                mqc, curctx, a, c, ct,
854
0
                flags, flagsUpdated,
855
0
                &datap[0],
856
0
                bpno,
857
0
                one,
858
0
                nmsedec,
859
0
                type,
860
0
                0);
861
0
            opj_t1_enc_refpass_step_macro(
862
0
                mqc, curctx, a, c, ct,
863
0
                flags, flagsUpdated,
864
0
                &datap[1],
865
0
                bpno,
866
0
                one,
867
0
                nmsedec,
868
0
                type,
869
0
                1);
870
0
            opj_t1_enc_refpass_step_macro(
871
0
                mqc, curctx, a, c, ct,
872
0
                flags, flagsUpdated,
873
0
                &datap[2],
874
0
                bpno,
875
0
                one,
876
0
                nmsedec,
877
0
                type,
878
0
                2);
879
0
            opj_t1_enc_refpass_step_macro(
880
0
                mqc, curctx, a, c, ct,
881
0
                flags, flagsUpdated,
882
0
                &datap[3],
883
0
                bpno,
884
0
                one,
885
0
                nmsedec,
886
0
                type,
887
0
                3);
888
0
            *f = flagsUpdated;
889
0
        }
890
0
    }
891
892
0
    if (k < t1->h) {
893
0
        OPJ_UINT32 j;
894
0
        const OPJ_UINT32 remaining_lines = t1->h - k;
895
#ifdef DEBUG_ENC_REF
896
        fprintf(stderr, " k=%d\n", k);
897
#endif
898
0
        for (i = 0; i < t1->w; ++i, ++f) {
899
#ifdef DEBUG_ENC_REF
900
            fprintf(stderr, " i=%d\n", i);
901
#endif
902
0
            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
903
                /* none significant */
904
0
                datap += remaining_lines;
905
0
                continue;
906
0
            }
907
0
            for (j = 0; j < remaining_lines; ++j, datap ++) {
908
0
                opj_t1_enc_refpass_step_macro(
909
0
                    mqc, curctx, a, c, ct,
910
0
                    *f, *f,
911
0
                    &datap[0],
912
0
                    bpno,
913
0
                    one,
914
0
                    nmsedec,
915
0
                    type,
916
0
                    j);
917
0
            }
918
0
        }
919
0
    }
920
921
0
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
922
0
}
923
924
925
static void opj_t1_dec_refpass_raw(
926
    opj_t1_t *t1,
927
    OPJ_INT32 bpno)
928
36.3k
{
929
36.3k
    OPJ_INT32 one, poshalf;
930
36.3k
    OPJ_UINT32 i, j, k;
931
36.3k
    OPJ_INT32 *data = t1->data;
932
36.3k
    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
933
36.3k
    const OPJ_UINT32 l_w = t1->w;
934
36.3k
    one = 1 << bpno;
935
36.3k
    poshalf = one >> 1;
936
321k
    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
937
12.6M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
938
12.3M
            opj_flag_t flags = *flagsp;
939
12.3M
            if (flags != 0) {
940
10.2M
                opj_t1_dec_refpass_step_raw(
941
10.2M
                    t1,
942
10.2M
                    flagsp,
943
10.2M
                    data,
944
10.2M
                    poshalf,
945
10.2M
                    0U);
946
10.2M
                opj_t1_dec_refpass_step_raw(
947
10.2M
                    t1,
948
10.2M
                    flagsp,
949
10.2M
                    data + l_w,
950
10.2M
                    poshalf,
951
10.2M
                    1U);
952
10.2M
                opj_t1_dec_refpass_step_raw(
953
10.2M
                    t1,
954
10.2M
                    flagsp,
955
10.2M
                    data + 2 * l_w,
956
10.2M
                    poshalf,
957
10.2M
                    2U);
958
10.2M
                opj_t1_dec_refpass_step_raw(
959
10.2M
                    t1,
960
10.2M
                    flagsp,
961
10.2M
                    data + 3 * l_w,
962
10.2M
                    poshalf,
963
10.2M
                    3U);
964
10.2M
            }
965
12.3M
        }
966
284k
    }
967
36.3k
    if (k < t1->h) {
968
986k
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
969
3.68M
            for (j = 0; j < t1->h - k; ++j) {
970
2.70M
                opj_t1_dec_refpass_step_raw(
971
2.70M
                    t1,
972
2.70M
                    flagsp,
973
2.70M
                    data + j * l_w,
974
2.70M
                    poshalf,
975
2.70M
                    j);
976
2.70M
            }
977
984k
        }
978
2.28k
    }
979
36.3k
}
980
981
761k
#define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
982
761k
{ \
983
761k
        OPJ_INT32 one, poshalf; \
984
761k
        OPJ_UINT32 i, j, k; \
985
761k
        register OPJ_INT32 *data = t1->data; \
986
761k
        register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
987
761k
        const OPJ_UINT32 l_w = w; \
988
761k
        opj_mqc_t* mqc = &(t1->mqc); \
989
761k
        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
990
761k
        register OPJ_UINT32 v; \
991
761k
        one = 1 << bpno; \
992
761k
        poshalf = one >> 1; \
993
9.53M
        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
994
418M
                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
995
409M
                        opj_flag_t flags = *flagsp; \
996
409M
                        if( flags != 0 ) { \
997
310M
                            opj_t1_dec_refpass_step_mqc_macro( \
998
310M
                                flags, data, l_w, 0, \
999
310M
                                mqc, curctx, v, a, c, ct, poshalf); \
1000
310M
                            opj_t1_dec_refpass_step_mqc_macro( \
1001
310M
                                flags, data, l_w, 1, \
1002
310M
                                mqc, curctx, v, a, c, ct, poshalf); \
1003
310M
                            opj_t1_dec_refpass_step_mqc_macro( \
1004
310M
                                flags, data, l_w, 2, \
1005
310M
                                mqc, curctx, v, a, c, ct, poshalf); \
1006
310M
                            opj_t1_dec_refpass_step_mqc_macro( \
1007
310M
                                flags, data, l_w, 3, \
1008
310M
                                mqc, curctx, v, a, c, ct, poshalf); \
1009
310M
                            *flagsp = flags; \
1010
310M
                        } \
1011
409M
                } \
1012
8.77M
        } \
1013
761k
        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1014
761k
        if( k < h ) { \
1015
6.73M
            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1016
23.7M
                for (j = 0; j < h - k; ++j) { \
1017
17.0M
                        opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1018
17.0M
                } \
1019
6.65M
            } \
1020
72.8k
        } \
1021
761k
}
1022
1023
static void opj_t1_dec_refpass_mqc_64x64(
1024
    opj_t1_t *t1,
1025
    OPJ_INT32 bpno)
1026
223k
{
1027
223k
    opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1028
223k
}
1029
1030
static void opj_t1_dec_refpass_mqc_generic(
1031
    opj_t1_t *t1,
1032
    OPJ_INT32 bpno)
1033
537k
{
1034
537k
    opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1035
537k
}
1036
1037
static void opj_t1_dec_refpass_mqc(
1038
    opj_t1_t *t1,
1039
    OPJ_INT32 bpno)
1040
761k
{
1041
761k
    if (t1->w == 64 && t1->h == 64) {
1042
223k
        opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1043
537k
    } else {
1044
537k
        opj_t1_dec_refpass_mqc_generic(t1, bpno);
1045
537k
    }
1046
761k
}
1047
1048
/**
1049
Encode clean-up pass step
1050
*/
1051
0
#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
1052
0
{ \
1053
0
    OPJ_UINT32 v; \
1054
0
    OPJ_UINT32 ci; \
1055
0
    opj_flag_t* const flagsp = (flagspIn); \
1056
0
    const OPJ_INT32* l_datap = (datapIn); \
1057
0
    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
1058
0
                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1059
0
 \
1060
0
    if ((*flagsp & check) == check) { \
1061
0
        if (runlen == 0) { \
1062
0
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1063
0
        } else if (runlen == 1) { \
1064
0
            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
1065
0
        } else if (runlen == 2) { \
1066
0
            *flagsp &= ~(T1_PI_2 | T1_PI_3); \
1067
0
        } else if (runlen == 3) { \
1068
0
            *flagsp &= ~(T1_PI_3); \
1069
0
        } \
1070
0
    } \
1071
0
    else \
1072
0
    for (ci = runlen; ci < lim; ++ci) { \
1073
0
        OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
1074
0
        if ((agg != 0) && (ci == runlen)) { \
1075
0
            goto_PARTIAL = OPJ_TRUE; \
1076
0
        } \
1077
0
        else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
1078
0
            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
1079
0
/* #ifdef DEBUG_ENC_CLN */ \
1080
0
/*            printf("   ctxt1=%d\n", ctxt1); */ \
1081
0
/* #endif */ \
1082
0
            opj_t1_setcurctx(curctx, ctxt1); \
1083
0
            v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
1084
0
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
1085
0
            if (v) { \
1086
0
                goto_PARTIAL = OPJ_TRUE; \
1087
0
            } \
1088
0
        } \
1089
0
        if( goto_PARTIAL ) { \
1090
0
            OPJ_UINT32 vsc; \
1091
0
            OPJ_UINT32 ctxt2, spb; \
1092
0
            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1093
0
                        *flagsp, \
1094
0
                        flagsp[-1], flagsp[1], \
1095
0
                        ci); \
1096
0
            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
1097
0
                                                (OPJ_UINT32)bpno); \
1098
0
            ctxt2 = opj_t1_getctxno_sc(lu); \
1099
0
/* #ifdef DEBUG_ENC_CLN */ \
1100
0
/*           printf("   ctxt2=%d\n", ctxt2); */ \
1101
0
/* #endif */ \
1102
0
            opj_t1_setcurctx(curctx, ctxt2); \
1103
0
 \
1104
0
            v = opj_smr_sign(*l_datap); \
1105
0
            spb = opj_t1_getspb(lu); \
1106
0
/* #ifdef DEBUG_ENC_CLN */ \
1107
0
/*           printf("   spb=%d\n", spb); */\
1108
0
/* #endif */ \
1109
0
            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
1110
0
            vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
1111
0
            opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
1112
0
        } \
1113
0
        *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
1114
0
        l_datap ++; \
1115
0
    } \
1116
0
}
1117
1118
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1119
                                      flags, flagsp, flags_stride, data, \
1120
                                      data_stride, ci, mqc, curctx, \
1121
1.64G
                                      v, a, c, ct, oneplushalf, vsc) \
1122
1.64G
{ \
1123
1.64G
    if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1124
447M
        do { \
1125
447M
            if( !partial ) { \
1126
438M
                OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1127
438M
                opj_t1_setcurctx(curctx, ctxt1); \
1128
438M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1129
438M
                if( !v ) \
1130
438M
                    break; \
1131
438M
            } \
1132
447M
            { \
1133
193M
                OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1134
193M
                                    flags, flagsp[-1], flagsp[1], \
1135
193M
                                    ci); \
1136
193M
                opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1137
193M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1138
193M
                v = v ^ opj_t1_getspb(lu); \
1139
193M
                data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1140
193M
                opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1141
193M
            } \
1142
193M
        } while(0); \
1143
447M
    } \
1144
1.64G
}
1145
1146
static void opj_t1_dec_clnpass_step(
1147
    opj_t1_t *t1,
1148
    opj_flag_t *flagsp,
1149
    OPJ_INT32 *datap,
1150
    OPJ_INT32 oneplushalf,
1151
    OPJ_UINT32 ci,
1152
    OPJ_UINT32 vsc)
1153
27.0M
{
1154
27.0M
    OPJ_UINT32 v;
1155
1156
27.0M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1157
27.0M
    opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1158
27.0M
                                  *flagsp, flagsp, t1->w + 2U, datap,
1159
27.0M
                                  0, ci, mqc, mqc->curctx,
1160
27.0M
                                  v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1161
27.0M
}
1162
1163
static void opj_t1_enc_clnpass(
1164
    opj_t1_t *t1,
1165
    OPJ_INT32 bpno,
1166
    OPJ_INT32 *nmsedec,
1167
    OPJ_UINT32 cblksty)
1168
0
{
1169
0
    OPJ_UINT32 i, k;
1170
0
    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1171
0
    opj_mqc_t* mqc = &(t1->mqc);
1172
0
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1173
0
    const OPJ_INT32* datap = t1->data;
1174
0
    opj_flag_t *f = &T1_FLAGS(0, 0);
1175
0
    const OPJ_UINT32 extra = 2U;
1176
1177
0
    *nmsedec = 0;
1178
#ifdef DEBUG_ENC_CLN
1179
    printf("enc_clnpass: bpno=%d\n", bpno);
1180
#endif
1181
0
    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
1182
#ifdef DEBUG_ENC_CLN
1183
        printf(" k=%d\n", k);
1184
#endif
1185
0
        for (i = 0; i < t1->w; ++i, f++) {
1186
0
            OPJ_UINT32 agg, runlen;
1187
#ifdef DEBUG_ENC_CLN
1188
            printf("  i=%d\n", i);
1189
#endif
1190
0
            agg = !*f;
1191
#ifdef DEBUG_ENC_CLN
1192
            printf("   agg=%d\n", agg);
1193
#endif
1194
0
            if (agg) {
1195
0
                for (runlen = 0; runlen < 4; ++runlen, ++datap) {
1196
0
                    if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
1197
0
                        break;
1198
0
                    }
1199
0
                }
1200
0
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
1201
0
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
1202
0
                if (runlen == 4) {
1203
0
                    continue;
1204
0
                }
1205
0
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
1206
0
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
1207
0
                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
1208
0
            } else {
1209
0
                runlen = 0;
1210
0
            }
1211
0
            opj_t1_enc_clnpass_step_macro(
1212
0
                mqc, curctx, a, c, ct,
1213
0
                f,
1214
0
                datap,
1215
0
                bpno,
1216
0
                one,
1217
0
                nmsedec,
1218
0
                agg,
1219
0
                runlen,
1220
0
                4U,
1221
0
                cblksty);
1222
0
            datap += 4 - runlen;
1223
0
        }
1224
0
    }
1225
0
    if (k < t1->h) {
1226
0
        const OPJ_UINT32 agg = 0;
1227
0
        const OPJ_UINT32 runlen = 0;
1228
#ifdef DEBUG_ENC_CLN
1229
        printf(" k=%d\n", k);
1230
#endif
1231
0
        for (i = 0; i < t1->w; ++i, f++) {
1232
#ifdef DEBUG_ENC_CLN
1233
            printf("  i=%d\n", i);
1234
            printf("   agg=%d\n", agg);
1235
#endif
1236
0
            opj_t1_enc_clnpass_step_macro(
1237
0
                mqc, curctx, a, c, ct,
1238
0
                f,
1239
0
                datap,
1240
0
                bpno,
1241
0
                one,
1242
0
                nmsedec,
1243
0
                agg,
1244
0
                runlen,
1245
0
                t1->h - k,
1246
0
                cblksty);
1247
0
            datap += t1->h - k;
1248
0
        }
1249
0
    }
1250
1251
0
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1252
0
}
1253
1254
1.02M
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1255
1.02M
{ \
1256
1.02M
    OPJ_INT32 one, half, oneplushalf; \
1257
1.02M
    OPJ_UINT32 runlen; \
1258
1.02M
    OPJ_UINT32 i, j, k; \
1259
1.02M
    const OPJ_UINT32 l_w = w; \
1260
1.02M
    opj_mqc_t* mqc = &(t1->mqc); \
1261
1.02M
    register OPJ_INT32 *data = t1->data; \
1262
1.02M
    register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1263
1.02M
    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1264
1.02M
    register OPJ_UINT32 v; \
1265
1.02M
    one = 1 << bpno; \
1266
1.02M
    half = one >> 1; \
1267
1.02M
    oneplushalf = one | half; \
1268
12.7M
    for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1269
598M
        for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1270
586M
            opj_flag_t flags = *flagsp; \
1271
586M
            if (flags == 0) { \
1272
187M
                OPJ_UINT32 partial = OPJ_TRUE; \
1273
187M
                opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1274
187M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1275
187M
                if (!v) { \
1276
177M
                    continue; \
1277
177M
                } \
1278
187M
                opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1279
9.22M
                opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1280
9.22M
                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1281
9.22M
                runlen = (runlen << 1) | v; \
1282
9.22M
                switch(runlen) { \
1283
2.87M
                    case 0: \
1284
2.87M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1285
2.87M
                                            flags, flagsp, flags_stride, data, \
1286
2.87M
                                            l_w, 0, mqc, curctx, \
1287
2.87M
                                            v, a, c, ct, oneplushalf, vsc); \
1288
2.87M
                        partial = OPJ_FALSE; \
1289
2.87M
                        /* FALLTHRU */ \
1290
5.26M
                    case 1: \
1291
5.26M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1292
5.26M
                                            flags, flagsp, flags_stride, data, \
1293
5.26M
                                            l_w, 1, mqc, curctx, \
1294
5.26M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1295
5.26M
                        partial = OPJ_FALSE; \
1296
5.26M
                        /* FALLTHRU */ \
1297
7.46M
                    case 2: \
1298
7.46M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1299
7.46M
                                            flags, flagsp, flags_stride, data, \
1300
7.46M
                                            l_w, 2, mqc, curctx, \
1301
7.46M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1302
7.46M
                        partial = OPJ_FALSE; \
1303
7.46M
                        /* FALLTHRU */ \
1304
9.22M
                    case 3: \
1305
9.22M
                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1306
9.22M
                                            flags, flagsp, flags_stride, data, \
1307
9.22M
                                            l_w, 3, mqc, curctx, \
1308
9.22M
                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
1309
9.22M
                        break; \
1310
9.22M
                } \
1311
399M
            } else { \
1312
399M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1313
399M
                                    flags, flagsp, flags_stride, data, \
1314
399M
                                    l_w, 0, mqc, curctx, \
1315
399M
                                    v, a, c, ct, oneplushalf, vsc); \
1316
399M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1317
399M
                                    flags, flagsp, flags_stride, data, \
1318
399M
                                    l_w, 1, mqc, curctx, \
1319
399M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1320
399M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1321
399M
                                    flags, flagsp, flags_stride, data, \
1322
399M
                                    l_w, 2, mqc, curctx, \
1323
399M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1324
399M
                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1325
399M
                                    flags, flagsp, flags_stride, data, \
1326
399M
                                    l_w, 3, mqc, curctx, \
1327
399M
                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
1328
399M
            } \
1329
586M
            *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1330
408M
        } \
1331
11.6M
    } \
1332
1.02M
    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1333
1.02M
    if( k < h ) { \
1334
10.5M
        for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1335
37.4M
            for (j = 0; j < h - k; ++j) { \
1336
27.0M
                opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1337
27.0M
            } \
1338
10.4M
            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1339
10.4M
        } \
1340
94.1k
    } \
1341
1.02M
}
1342
1343
static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1344
1.02M
{
1345
1.02M
    if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1346
774k
        opj_mqc_t* mqc = &(t1->mqc);
1347
774k
        OPJ_UINT32 v, v2;
1348
774k
        opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1349
774k
        opj_mqc_decode(v, mqc);
1350
774k
        opj_mqc_decode(v2, mqc);
1351
774k
        v = (v << 1) | v2;
1352
774k
        opj_mqc_decode(v2, mqc);
1353
774k
        v = (v << 1) | v2;
1354
774k
        opj_mqc_decode(v2, mqc);
1355
774k
        v = (v << 1) | v2;
1356
        /*
1357
        if (v!=0xa) {
1358
            opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1359
        }
1360
        */
1361
774k
    }
1362
1.02M
}
1363
1364
static void opj_t1_dec_clnpass_64x64_novsc(
1365
    opj_t1_t *t1,
1366
    OPJ_INT32 bpno)
1367
132k
{
1368
132k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1369
132k
}
1370
1371
static void opj_t1_dec_clnpass_64x64_vsc(
1372
    opj_t1_t *t1,
1373
    OPJ_INT32 bpno)
1374
200k
{
1375
200k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1376
200k
}
1377
1378
static void opj_t1_dec_clnpass_generic_novsc(
1379
    opj_t1_t *t1,
1380
    OPJ_INT32 bpno)
1381
271k
{
1382
271k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1383
271k
                                t1->w + 2U);
1384
271k
}
1385
1386
static void opj_t1_dec_clnpass_generic_vsc(
1387
    opj_t1_t *t1,
1388
    OPJ_INT32 bpno)
1389
419k
{
1390
419k
    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1391
419k
                                t1->w + 2U);
1392
419k
}
1393
1394
static void opj_t1_dec_clnpass(
1395
    opj_t1_t *t1,
1396
    OPJ_INT32 bpno,
1397
    OPJ_INT32 cblksty)
1398
1.02M
{
1399
1.02M
    if (t1->w == 64 && t1->h == 64) {
1400
333k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1401
200k
            opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1402
200k
        } else {
1403
132k
            opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1404
132k
        }
1405
690k
    } else {
1406
690k
        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1407
419k
            opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1408
419k
        } else {
1409
271k
            opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1410
271k
        }
1411
690k
    }
1412
1.02M
    opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1413
1.02M
}
1414
1415
1416
static OPJ_FLOAT64 opj_t1_getwmsedec(
1417
    OPJ_INT32 nmsedec,
1418
    OPJ_UINT32 compno,
1419
    OPJ_UINT32 level,
1420
    OPJ_UINT32 orient,
1421
    OPJ_INT32 bpno,
1422
    OPJ_UINT32 qmfbid,
1423
    OPJ_FLOAT64 stepsize,
1424
    OPJ_UINT32 numcomps,
1425
    const OPJ_FLOAT64 * mct_norms,
1426
    OPJ_UINT32 mct_numcomps)
1427
0
{
1428
0
    OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1429
0
    OPJ_ARG_NOT_USED(numcomps);
1430
1431
0
    if (mct_norms && (compno < mct_numcomps)) {
1432
0
        w1 = mct_norms[compno];
1433
0
    }
1434
1435
0
    if (qmfbid == 1) {
1436
0
        w2 = opj_dwt_getnorm(level, orient);
1437
0
    } else {    /* if (qmfbid == 0) */
1438
0
        const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1439
0
                                    (orient == 3) ? 2 : 1;
1440
0
        w2 = opj_dwt_getnorm_real(level, orient);
1441
        /* Not sure this is right. But preserves past behaviour */
1442
0
        stepsize /= (1 << log2_gain);
1443
0
    }
1444
1445
0
    wmsedec = w1 * w2 * stepsize * (1 << bpno);
1446
0
    wmsedec *= wmsedec * nmsedec / 8192.0;
1447
1448
0
    return wmsedec;
1449
0
}
1450
1451
static OPJ_BOOL opj_t1_allocate_buffers(
1452
    opj_t1_t *t1,
1453
    OPJ_UINT32 w,
1454
    OPJ_UINT32 h)
1455
1.81M
{
1456
1.81M
    OPJ_UINT32 flagssize;
1457
1.81M
    OPJ_UINT32 flags_stride;
1458
1459
    /* No risk of overflow. Prior checks ensure those assert are met */
1460
    /* They are per the specification */
1461
1.81M
    assert(w <= 1024);
1462
1.81M
    assert(h <= 1024);
1463
1.81M
    assert(w * h <= 4096);
1464
1465
    /* encoder uses tile buffer, so no need to allocate */
1466
1.81M
    {
1467
1.81M
        OPJ_UINT32 datasize = w * h;
1468
1469
1.81M
        if (datasize > t1->datasize) {
1470
1.91k
            opj_aligned_free(t1->data);
1471
1.91k
            t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1472
1.91k
            if (!t1->data) {
1473
                /* FIXME event manager error callback */
1474
0
                return OPJ_FALSE;
1475
0
            }
1476
1.91k
            t1->datasize = datasize;
1477
1.91k
        }
1478
        /* memset first arg is declared to never be null by gcc */
1479
1.81M
        if (t1->data != NULL) {
1480
1.81M
            memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1481
1.81M
        }
1482
1.81M
    }
1483
1484
0
    flags_stride = w + 2U; /* can't be 0U */
1485
1486
1.81M
    flagssize = (h + 3U) / 4U + 2U;
1487
1488
1.81M
    flagssize *= flags_stride;
1489
1.81M
    {
1490
1.81M
        opj_flag_t* p;
1491
1.81M
        OPJ_UINT32 x;
1492
1.81M
        OPJ_UINT32 flags_height = (h + 3U) / 4U;
1493
1494
1.81M
        if (flagssize > t1->flagssize) {
1495
1496
37.3k
            opj_aligned_free(t1->flags);
1497
37.3k
            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1498
37.3k
                            opj_flag_t));
1499
37.3k
            if (!t1->flags) {
1500
                /* FIXME event manager error callback */
1501
0
                return OPJ_FALSE;
1502
0
            }
1503
37.3k
        }
1504
1.81M
        t1->flagssize = flagssize;
1505
1506
1.81M
        memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1507
1508
1.81M
        p = &t1->flags[0];
1509
388M
        for (x = 0; x < flags_stride; ++x) {
1510
            /* magic value to hopefully stop any passes being interested in this entry */
1511
386M
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1512
386M
        }
1513
1514
1.81M
        p = &t1->flags[((flags_height + 1) * flags_stride)];
1515
388M
        for (x = 0; x < flags_stride; ++x) {
1516
            /* magic value to hopefully stop any passes being interested in this entry */
1517
386M
            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1518
386M
        }
1519
1520
1.81M
        if (h % 4) {
1521
69.1k
            OPJ_UINT32 v = 0;
1522
69.1k
            p = &t1->flags[((flags_height) * flags_stride)];
1523
69.1k
            if (h % 4 == 1) {
1524
4.00k
                v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1525
65.1k
            } else if (h % 4 == 2) {
1526
27.0k
                v |= T1_PI_2 | T1_PI_3;
1527
38.1k
            } else if (h % 4 == 3) {
1528
38.1k
                v |= T1_PI_3;
1529
38.1k
            }
1530
9.02M
            for (x = 0; x < flags_stride; ++x) {
1531
8.95M
                *p++ = v;
1532
8.95M
            }
1533
69.1k
        }
1534
1.81M
    }
1535
1536
0
    t1->w = w;
1537
1.81M
    t1->h = h;
1538
1539
1.81M
    return OPJ_TRUE;
1540
1.81M
}
1541
1542
/* ----------------------------------------------------------------------- */
1543
1544
/* ----------------------------------------------------------------------- */
1545
/**
1546
 * Creates a new Tier 1 handle
1547
 * and initializes the look-up tables of the Tier-1 coder/decoder
1548
 * @return a new T1 handle if successful, returns NULL otherwise
1549
*/
1550
opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1551
938
{
1552
938
    opj_t1_t *l_t1 = 00;
1553
1554
938
    l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1555
938
    if (!l_t1) {
1556
0
        return 00;
1557
0
    }
1558
1559
938
    l_t1->encoder = isEncoder;
1560
1561
938
    return l_t1;
1562
938
}
1563
1564
1565
/**
1566
 * Destroys a previously created T1 handle
1567
 *
1568
 * @param p_t1 Tier 1 handle to destroy
1569
*/
1570
void opj_t1_destroy(opj_t1_t *p_t1)
1571
938
{
1572
938
    if (! p_t1) {
1573
0
        return;
1574
0
    }
1575
1576
938
    if (p_t1->data) {
1577
938
        opj_aligned_free(p_t1->data);
1578
938
        p_t1->data = 00;
1579
938
    }
1580
1581
938
    if (p_t1->flags) {
1582
938
        opj_aligned_free(p_t1->flags);
1583
938
        p_t1->flags = 00;
1584
938
    }
1585
1586
938
    opj_free(p_t1->cblkdatabuffer);
1587
1588
938
    opj_free(p_t1);
1589
938
}
1590
1591
typedef struct {
1592
    OPJ_BOOL whole_tile_decoding;
1593
    OPJ_UINT32 resno;
1594
    opj_tcd_cblk_dec_t* cblk;
1595
    opj_tcd_band_t* band;
1596
    opj_tcd_tilecomp_t* tilec;
1597
    opj_tccp_t* tccp;
1598
    OPJ_BOOL mustuse_cblkdatabuffer;
1599
    volatile OPJ_BOOL* pret;
1600
    opj_event_mgr_t *p_manager;
1601
    opj_mutex_t* p_manager_mutex;
1602
    OPJ_BOOL check_pterm;
1603
} opj_t1_cblk_decode_processing_job_t;
1604
1605
static void opj_t1_destroy_wrapper(void* t1)
1606
938
{
1607
938
    opj_t1_destroy((opj_t1_t*) t1);
1608
938
}
1609
1610
static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1611
1.81M
{
1612
1.81M
    opj_tcd_cblk_dec_t* cblk;
1613
1.81M
    opj_tcd_band_t* band;
1614
1.81M
    opj_tcd_tilecomp_t* tilec;
1615
1.81M
    opj_tccp_t* tccp;
1616
1.81M
    OPJ_INT32* OPJ_RESTRICT datap;
1617
1.81M
    OPJ_UINT32 cblk_w, cblk_h;
1618
1.81M
    OPJ_INT32 x, y;
1619
1.81M
    OPJ_UINT32 i, j;
1620
1.81M
    opj_t1_cblk_decode_processing_job_t* job;
1621
1.81M
    opj_t1_t* t1;
1622
1.81M
    OPJ_UINT32 resno;
1623
1.81M
    OPJ_UINT32 tile_w;
1624
1625
1.81M
    job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1626
1627
1.81M
    cblk = job->cblk;
1628
1629
1.81M
    if (!job->whole_tile_decoding) {
1630
0
        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1631
0
        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1632
1633
0
        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1634
0
                             cblk_w * cblk_h);
1635
0
        if (cblk->decoded_data == NULL) {
1636
0
            if (job->p_manager_mutex) {
1637
0
                opj_mutex_lock(job->p_manager_mutex);
1638
0
            }
1639
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1640
0
                          "Cannot allocate cblk->decoded_data\n");
1641
0
            if (job->p_manager_mutex) {
1642
0
                opj_mutex_unlock(job->p_manager_mutex);
1643
0
            }
1644
0
            *(job->pret) = OPJ_FALSE;
1645
0
            opj_free(job);
1646
0
            return;
1647
0
        }
1648
        /* Zero-init required */
1649
0
        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1650
1.81M
    } else if (cblk->decoded_data) {
1651
        /* Not sure if that code path can happen, but better be */
1652
        /* safe than sorry */
1653
0
        opj_aligned_free(cblk->decoded_data);
1654
0
        cblk->decoded_data = NULL;
1655
0
    }
1656
1657
1.81M
    resno = job->resno;
1658
1.81M
    band = job->band;
1659
1.81M
    tilec = job->tilec;
1660
1.81M
    tccp = job->tccp;
1661
1.81M
    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1662
1.81M
                          -
1663
1.81M
                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1664
1665
1.81M
    if (!*(job->pret)) {
1666
0
        opj_free(job);
1667
0
        return;
1668
0
    }
1669
1670
1.81M
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1671
1.81M
    if (t1 == NULL) {
1672
938
        t1 = opj_t1_create(OPJ_FALSE);
1673
938
        if (t1 == NULL) {
1674
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1675
0
                          "Cannot allocate Tier 1 handle\n");
1676
0
            *(job->pret) = OPJ_FALSE;
1677
0
            opj_free(job);
1678
0
            return;
1679
0
        }
1680
938
        if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
1681
0
            opj_event_msg(job->p_manager, EVT_ERROR,
1682
0
                          "Unable to set t1 handle as TLS\n");
1683
0
            opj_t1_destroy(t1);
1684
0
            *(job->pret) = OPJ_FALSE;
1685
0
            opj_free(job);
1686
0
            return;
1687
0
        }
1688
938
    }
1689
1.81M
    t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1690
1691
1.81M
    if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
1692
1.30k
        if (OPJ_FALSE == opj_t1_ht_decode_cblk(
1693
1.30k
                    t1,
1694
1.30k
                    cblk,
1695
1.30k
                    band->bandno,
1696
1.30k
                    (OPJ_UINT32)tccp->roishift,
1697
1.30k
                    tccp->cblksty,
1698
1.30k
                    job->p_manager,
1699
1.30k
                    job->p_manager_mutex,
1700
1.30k
                    job->check_pterm)) {
1701
29
            *(job->pret) = OPJ_FALSE;
1702
29
            opj_free(job);
1703
29
            return;
1704
29
        }
1705
1.81M
    } else {
1706
1.81M
        if (OPJ_FALSE == opj_t1_decode_cblk(
1707
1.81M
                    t1,
1708
1.81M
                    cblk,
1709
1.81M
                    band->bandno,
1710
1.81M
                    (OPJ_UINT32)tccp->roishift,
1711
1.81M
                    tccp->cblksty,
1712
1.81M
                    job->p_manager,
1713
1.81M
                    job->p_manager_mutex,
1714
1.81M
                    job->check_pterm)) {
1715
1
            *(job->pret) = OPJ_FALSE;
1716
1
            opj_free(job);
1717
1
            return;
1718
1
        }
1719
1.81M
    }
1720
1721
1.81M
    x = cblk->x0 - band->x0;
1722
1.81M
    y = cblk->y0 - band->y0;
1723
1.81M
    if (band->bandno & 1) {
1724
91.8k
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1725
91.8k
        x += pres->x1 - pres->x0;
1726
91.8k
    }
1727
1.81M
    if (band->bandno & 2) {
1728
90.5k
        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1729
90.5k
        y += pres->y1 - pres->y0;
1730
90.5k
    }
1731
1732
1.81M
    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1733
1.81M
    cblk_w = t1->w;
1734
1.81M
    cblk_h = t1->h;
1735
1736
1.81M
    if (tccp->roishift) {
1737
2
        if (tccp->roishift >= 31) {
1738
0
            for (j = 0; j < cblk_h; ++j) {
1739
0
                for (i = 0; i < cblk_w; ++i) {
1740
0
                    datap[(j * cblk_w) + i] = 0;
1741
0
                }
1742
0
            }
1743
2
        } else {
1744
2
            OPJ_INT32 thresh = 1 << tccp->roishift;
1745
10
            for (j = 0; j < cblk_h; ++j) {
1746
12
                for (i = 0; i < cblk_w; ++i) {
1747
4
                    OPJ_INT32 val = datap[(j * cblk_w) + i];
1748
4
                    OPJ_INT32 mag = abs(val);
1749
4
                    if (mag >= thresh) {
1750
0
                        mag >>= tccp->roishift;
1751
0
                        datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1752
0
                    }
1753
4
                }
1754
8
            }
1755
2
        }
1756
2
    }
1757
1758
    /* Both can be non NULL if for example decoding a full tile and then */
1759
    /* partially a tile. In which case partial decoding should be the */
1760
    /* priority */
1761
1.81M
    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1762
1763
1.81M
    if (cblk->decoded_data) {
1764
0
        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1765
0
        if (tccp->qmfbid == 1) {
1766
0
            for (i = 0; i < cblk_size; ++i) {
1767
0
                datap[i] /= 2;
1768
0
            }
1769
0
        } else {        /* if (tccp->qmfbid == 0) */
1770
0
            const float stepsize = 0.5f * band->stepsize;
1771
0
            i = 0;
1772
0
#ifdef __SSE2__
1773
0
            {
1774
0
                const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1775
0
                for (; i < (cblk_size & ~15U); i += 16) {
1776
0
                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1777
0
                                                           datap + 0)));
1778
0
                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1779
0
                                                           datap + 4)));
1780
0
                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1781
0
                                                           datap + 8)));
1782
0
                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1783
0
                                                           datap + 12)));
1784
0
                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1785
0
                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1786
0
                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1787
0
                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1788
0
                    datap += 16;
1789
0
                }
1790
0
            }
1791
0
#endif
1792
0
            for (; i < cblk_size; ++i) {
1793
0
                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1794
0
                memcpy(datap, &tmp, sizeof(tmp));
1795
0
                datap++;
1796
0
            }
1797
0
        }
1798
1.81M
    } else if (tccp->qmfbid == 1) {
1799
1.74M
        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1800
1.74M
                                                       (OPJ_SIZE_T)x];
1801
59.6M
        for (j = 0; j < cblk_h; ++j) {
1802
            //positive -> round down aka.  (83)/2 =  41.5 ->  41
1803
            //negative -> round up   aka. (-83)/2 = -41.5 -> -41
1804
#if defined(__AVX512F__)
1805
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1806
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1807
            for (i = 0; i < cblk_w / 16; ++i) {
1808
                __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in));
1809
                const __m512i add_avx = _mm512_srli_epi32(in_avx, 31);
1810
                in_avx = _mm512_add_epi32(in_avx, add_avx);
1811
                _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1));
1812
                ptr_in += 16;
1813
                ptr_out += 16;
1814
            }
1815
1816
            for (i = 0; i < cblk_w % 16; ++i) {
1817
                ptr_out[i] = ptr_in[i] / 2;
1818
            }
1819
#elif defined(__AVX2__)
1820
            OPJ_INT32* ptr_in = datap + (j * cblk_w);
1821
            OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
1822
            for (i = 0; i < cblk_w / 8; ++i) {
1823
                __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in));
1824
                const __m256i add_avx = _mm256_srli_epi32(in_avx, 31);
1825
                in_avx = _mm256_add_epi32(in_avx, add_avx);
1826
                _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1));
1827
                ptr_in += 8;
1828
                ptr_out += 8;
1829
            }
1830
1831
            for (i = 0; i < cblk_w % 8; ++i) {
1832
                ptr_out[i] = ptr_in[i] / 2;
1833
            }
1834
#else
1835
57.9M
            i = 0;
1836
1.10G
            for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1837
1.04G
                OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1838
1.04G
                OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1839
1.04G
                OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1840
1.04G
                OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1841
1.04G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1842
1.04G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1843
1.04G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1844
1.04G
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1845
1.04G
            }
1846
60.5M
            for (; i < cblk_w; ++i) {
1847
2.62M
                OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1848
2.62M
                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1849
2.62M
            }
1850
57.9M
#endif
1851
57.9M
        }
1852
1.74M
    } else {        /* if (tccp->qmfbid == 0) */
1853
64.7k
        const float stepsize = 0.5f * band->stepsize;
1854
64.7k
        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1855
64.7k
                                                         tile_w + (OPJ_SIZE_T)x];
1856
2.68M
        for (j = 0; j < cblk_h; ++j) {
1857
2.62M
            OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1858
91.5M
            for (i = 0; i < cblk_w; ++i) {
1859
88.9M
                OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1860
88.9M
                *tiledp2 = tmp;
1861
88.9M
                datap++;
1862
88.9M
                tiledp2++;
1863
88.9M
            }
1864
2.62M
            tiledp += tile_w;
1865
2.62M
        }
1866
64.7k
    }
1867
1868
1.81M
    opj_free(job);
1869
1.81M
}
1870
1871
1872
void opj_t1_decode_cblks(opj_tcd_t* tcd,
1873
                         volatile OPJ_BOOL* pret,
1874
                         opj_tcd_tilecomp_t* tilec,
1875
                         opj_tccp_t* tccp,
1876
                         opj_event_mgr_t *p_manager,
1877
                         opj_mutex_t* p_manager_mutex,
1878
                         OPJ_BOOL check_pterm
1879
                        )
1880
4.87k
{
1881
4.87k
    opj_thread_pool_t* tp = tcd->thread_pool;
1882
4.87k
    OPJ_UINT32 resno, bandno, precno, cblkno;
1883
1884
#ifdef DEBUG_VERBOSE
1885
    OPJ_UINT32 codeblocks_decoded = 0;
1886
    printf("Enter opj_t1_decode_cblks()\n");
1887
#endif
1888
1889
11.8k
    for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1890
7.00k
        opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1891
1892
18.2k
        for (bandno = 0; bandno < res->numbands; ++bandno) {
1893
11.2k
            opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1894
1895
234k
            for (precno = 0; precno < res->pw * res->ph; ++precno) {
1896
223k
                opj_tcd_precinct_t* precinct = &band->precincts[precno];
1897
1898
223k
                if (!opj_tcd_is_subband_area_of_interest(tcd,
1899
223k
                        tilec->compno,
1900
223k
                        resno,
1901
223k
                        band->bandno,
1902
223k
                        (OPJ_UINT32)precinct->x0,
1903
223k
                        (OPJ_UINT32)precinct->y0,
1904
223k
                        (OPJ_UINT32)precinct->x1,
1905
223k
                        (OPJ_UINT32)precinct->y1)) {
1906
247
                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1907
0
                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1908
0
                        if (cblk->decoded_data) {
1909
#ifdef DEBUG_VERBOSE
1910
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1911
                                   cblk->x0, cblk->y0, resno, bandno);
1912
#endif
1913
0
                            opj_aligned_free(cblk->decoded_data);
1914
0
                            cblk->decoded_data = NULL;
1915
0
                        }
1916
0
                    }
1917
247
                    continue;
1918
247
                }
1919
1920
2.03M
                for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1921
1.81M
                    opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1922
1.81M
                    opj_t1_cblk_decode_processing_job_t* job;
1923
1924
1.81M
                    if (!opj_tcd_is_subband_area_of_interest(tcd,
1925
1.81M
                            tilec->compno,
1926
1.81M
                            resno,
1927
1.81M
                            band->bandno,
1928
1.81M
                            (OPJ_UINT32)cblk->x0,
1929
1.81M
                            (OPJ_UINT32)cblk->y0,
1930
1.81M
                            (OPJ_UINT32)cblk->x1,
1931
1.81M
                            (OPJ_UINT32)cblk->y1)) {
1932
0
                        if (cblk->decoded_data) {
1933
#ifdef DEBUG_VERBOSE
1934
                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1935
                                   cblk->x0, cblk->y0, resno, bandno);
1936
#endif
1937
0
                            opj_aligned_free(cblk->decoded_data);
1938
0
                            cblk->decoded_data = NULL;
1939
0
                        }
1940
0
                        continue;
1941
0
                    }
1942
1943
1.81M
                    if (!tcd->whole_tile_decoding) {
1944
0
                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1945
0
                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1946
0
                        if (cblk->decoded_data != NULL) {
1947
#ifdef DEBUG_VERBOSE
1948
                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1949
                                   cblk->x0, cblk->y0, resno, bandno);
1950
#endif
1951
0
                            continue;
1952
0
                        }
1953
0
                        if (cblk_w == 0 || cblk_h == 0) {
1954
0
                            continue;
1955
0
                        }
1956
#ifdef DEBUG_VERBOSE
1957
                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1958
                               cblk->x0, cblk->y0, resno, bandno);
1959
#endif
1960
0
                    }
1961
1962
1.81M
                    job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1963
1.81M
                            sizeof(opj_t1_cblk_decode_processing_job_t));
1964
1.81M
                    if (!job) {
1965
0
                        *pret = OPJ_FALSE;
1966
0
                        return;
1967
0
                    }
1968
1.81M
                    job->whole_tile_decoding = tcd->whole_tile_decoding;
1969
1.81M
                    job->resno = resno;
1970
1.81M
                    job->cblk = cblk;
1971
1.81M
                    job->band = band;
1972
1.81M
                    job->tilec = tilec;
1973
1.81M
                    job->tccp = tccp;
1974
1.81M
                    job->pret = pret;
1975
1.81M
                    job->p_manager_mutex = p_manager_mutex;
1976
1.81M
                    job->p_manager = p_manager;
1977
1.81M
                    job->check_pterm = check_pterm;
1978
1.81M
                    job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1979
1.81M
                    opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1980
#ifdef DEBUG_VERBOSE
1981
                    codeblocks_decoded ++;
1982
#endif
1983
1.81M
                    if (!(*pret)) {
1984
30
                        return;
1985
30
                    }
1986
1.81M
                } /* cblkno */
1987
223k
            } /* precno */
1988
11.2k
        } /* bandno */
1989
7.00k
    } /* resno */
1990
1991
#ifdef DEBUG_VERBOSE
1992
    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1993
#endif
1994
4.84k
    return;
1995
4.87k
}
1996
1997
1998
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1999
                                   opj_tcd_cblk_dec_t* cblk,
2000
                                   OPJ_UINT32 orient,
2001
                                   OPJ_UINT32 roishift,
2002
                                   OPJ_UINT32 cblksty,
2003
                                   opj_event_mgr_t *p_manager,
2004
                                   opj_mutex_t* p_manager_mutex,
2005
                                   OPJ_BOOL check_pterm)
2006
1.81M
{
2007
1.81M
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2008
2009
1.81M
    OPJ_INT32 bpno_plus_one;
2010
1.81M
    OPJ_UINT32 passtype;
2011
1.81M
    OPJ_UINT32 segno, passno;
2012
1.81M
    OPJ_BYTE* cblkdata = NULL;
2013
1.81M
    OPJ_UINT32 cblkdataindex = 0;
2014
1.81M
    OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
2015
1.81M
    OPJ_INT32* original_t1_data = NULL;
2016
2017
1.81M
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2018
2019
1.81M
    if (!opj_t1_allocate_buffers(
2020
1.81M
                t1,
2021
1.81M
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2022
1.81M
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2023
0
        return OPJ_FALSE;
2024
0
    }
2025
2026
1.81M
    bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
2027
1.81M
    if (bpno_plus_one >= 31) {
2028
1
        if (p_manager_mutex) {
2029
1
            opj_mutex_lock(p_manager_mutex);
2030
1
        }
2031
1
        opj_event_msg(p_manager, EVT_WARNING,
2032
1
                      "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
2033
1
                      bpno_plus_one);
2034
1
        if (p_manager_mutex) {
2035
1
            opj_mutex_unlock(p_manager_mutex);
2036
1
        }
2037
1
        return OPJ_FALSE;
2038
1
    }
2039
1.81M
    passtype = 2;
2040
2041
1.81M
    opj_mqc_resetstates(mqc);
2042
1.81M
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2043
1.81M
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2044
1.81M
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2045
2046
1.81M
    if (cblk->corrupted) {
2047
0
        assert(cblk->numchunks == 0);
2048
0
        return OPJ_TRUE;
2049
0
    }
2050
2051
    /* Even if we have a single chunk, in multi-threaded decoding */
2052
    /* the insertion of our synthetic marker might potentially override */
2053
    /* valid codestream of other codeblocks decoded in parallel. */
2054
1.81M
    if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer &&
2055
1.70M
                                cblk->numchunks > 0)) {
2056
109k
        OPJ_UINT32 i;
2057
109k
        OPJ_UINT32 cblk_len;
2058
2059
        /* Compute whole codeblock length from chunk lengths */
2060
109k
        cblk_len = 0;
2061
539k
        for (i = 0; i < cblk->numchunks; i++) {
2062
430k
            cblk_len += cblk->chunks[i].len;
2063
430k
        }
2064
2065
        /* Allocate temporary memory if needed */
2066
109k
        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
2067
2.16k
            cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
2068
2.16k
                                              cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
2069
2.16k
            if (cblkdata == NULL) {
2070
0
                return OPJ_FALSE;
2071
0
            }
2072
2.16k
            t1->cblkdatabuffer = cblkdata;
2073
2.16k
            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
2074
2.16k
            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
2075
2.16k
        }
2076
2077
        /* Concatenate all chunks */
2078
109k
        cblkdata = t1->cblkdatabuffer;
2079
109k
        cblk_len = 0;
2080
539k
        for (i = 0; i < cblk->numchunks; i++) {
2081
430k
            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2082
430k
            cblk_len += cblk->chunks[i].len;
2083
430k
        }
2084
1.70M
    } else if (cblk->numchunks == 1) {
2085
175k
        cblkdata = cblk->chunks[0].data;
2086
1.52M
    } else {
2087
        /* Not sure if that can happen in practice, but avoid Coverity to */
2088
        /* think we will dereference a null cblkdta pointer */
2089
1.52M
        return OPJ_TRUE;
2090
1.52M
    }
2091
2092
    /* For subtile decoding, directly decode in the decoded_data buffer of */
2093
    /* the code-block. Hack t1->data to point to it, and restore it later */
2094
284k
    if (cblk->decoded_data) {
2095
0
        original_t1_data = t1->data;
2096
0
        t1->data = cblk->decoded_data;
2097
0
    }
2098
2099
716k
    for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2100
431k
        opj_tcd_seg_t *seg = &cblk->segs[segno];
2101
2102
        /* BYPASS mode */
2103
431k
        type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2104
431k
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2105
2106
431k
        if (type == T1_TYPE_RAW) {
2107
57.3k
            opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2108
57.3k
                                 OPJ_COMMON_CBLK_DATA_EXTRA);
2109
374k
        } else {
2110
374k
            opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2111
374k
                             OPJ_COMMON_CBLK_DATA_EXTRA);
2112
374k
        }
2113
431k
        cblkdataindex += seg->len;
2114
2115
3.10M
        for (passno = 0; (passno < seg->real_num_passes) &&
2116
3.10M
                (bpno_plus_one >= 1); ++passno) {
2117
2.67M
            switch (passtype) {
2118
853k
            case 0:
2119
853k
                if (type == T1_TYPE_RAW) {
2120
37.9k
                    opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2121
815k
                } else {
2122
815k
                    opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2123
815k
                }
2124
853k
                break;
2125
798k
            case 1:
2126
798k
                if (type == T1_TYPE_RAW) {
2127
36.3k
                    opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2128
761k
                } else {
2129
761k
                    opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2130
761k
                }
2131
798k
                break;
2132
1.02M
            case 2:
2133
1.02M
                opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2134
1.02M
                break;
2135
2.67M
            }
2136
2137
2.67M
            if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2138
1.38M
                opj_mqc_resetstates(mqc);
2139
1.38M
                opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2140
1.38M
                opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2141
1.38M
                opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2142
1.38M
            }
2143
2.67M
            if (++passtype == 3) {
2144
1.02M
                passtype = 0;
2145
1.02M
                bpno_plus_one--;
2146
1.02M
            }
2147
2.67M
        }
2148
2149
431k
        opq_mqc_finish_dec(mqc);
2150
431k
    }
2151
2152
284k
    if (check_pterm) {
2153
149k
        if (mqc->bp + 2 < mqc->end) {
2154
31.0k
            if (p_manager_mutex) {
2155
31.0k
                opj_mutex_lock(p_manager_mutex);
2156
31.0k
            }
2157
31.0k
            opj_event_msg(p_manager, EVT_WARNING,
2158
31.0k
                          "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2159
31.0k
                          (int)(mqc->end - mqc->bp) - 2,
2160
31.0k
                          (int)(mqc->bp - mqc->start),
2161
31.0k
                          (int)(mqc->end - mqc->start));
2162
31.0k
            if (p_manager_mutex) {
2163
31.0k
                opj_mutex_unlock(p_manager_mutex);
2164
31.0k
            }
2165
118k
        } else if (mqc->end_of_byte_stream_counter > 2) {
2166
107k
            if (p_manager_mutex) {
2167
107k
                opj_mutex_lock(p_manager_mutex);
2168
107k
            }
2169
107k
            opj_event_msg(p_manager, EVT_WARNING,
2170
107k
                          "PTERM check failure: %d synthesized 0xFF markers read\n",
2171
107k
                          mqc->end_of_byte_stream_counter);
2172
107k
            if (p_manager_mutex) {
2173
107k
                opj_mutex_unlock(p_manager_mutex);
2174
107k
            }
2175
107k
        }
2176
149k
    }
2177
2178
    /* Restore original t1->data is needed */
2179
284k
    if (cblk->decoded_data) {
2180
0
        t1->data = original_t1_data;
2181
0
    }
2182
2183
284k
    return OPJ_TRUE;
2184
284k
}
2185
2186
2187
typedef struct {
2188
    OPJ_UINT32 compno;
2189
    OPJ_UINT32 resno;
2190
    opj_tcd_cblk_enc_t* cblk;
2191
    opj_tcd_tile_t *tile;
2192
    opj_tcd_band_t* band;
2193
    opj_tcd_tilecomp_t* tilec;
2194
    opj_tccp_t* tccp;
2195
    const OPJ_FLOAT64 * mct_norms;
2196
    OPJ_UINT32 mct_numcomps;
2197
    volatile OPJ_BOOL* pret;
2198
    opj_mutex_t* mutex;
2199
} opj_t1_cblk_encode_processing_job_t;
2200
2201
/** Procedure to deal with a asynchronous code-block encoding job.
2202
 *
2203
 * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2204
 * @param tls       TLS handle.
2205
 */
2206
static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
2207
0
{
2208
0
    opj_t1_cblk_encode_processing_job_t* job =
2209
0
        (opj_t1_cblk_encode_processing_job_t*)user_data;
2210
0
    opj_tcd_cblk_enc_t* cblk = job->cblk;
2211
0
    const opj_tcd_band_t* band = job->band;
2212
0
    const opj_tcd_tilecomp_t* tilec = job->tilec;
2213
0
    const opj_tccp_t* tccp = job->tccp;
2214
0
    const OPJ_UINT32 resno = job->resno;
2215
0
    opj_t1_t* t1;
2216
0
    const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2217
2218
0
    OPJ_INT32* OPJ_RESTRICT tiledp;
2219
0
    OPJ_UINT32 cblk_w;
2220
0
    OPJ_UINT32 cblk_h;
2221
0
    OPJ_UINT32 i, j;
2222
2223
0
    OPJ_INT32 x = cblk->x0 - band->x0;
2224
0
    OPJ_INT32 y = cblk->y0 - band->y0;
2225
2226
0
    if (!*(job->pret)) {
2227
0
        opj_free(job);
2228
0
        return;
2229
0
    }
2230
2231
0
    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2232
0
    if (t1 == NULL) {
2233
0
        t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2234
0
        opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2235
0
    }
2236
2237
0
    if (band->bandno & 1) {
2238
0
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2239
0
        x += pres->x1 - pres->x0;
2240
0
    }
2241
0
    if (band->bandno & 2) {
2242
0
        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2243
0
        y += pres->y1 - pres->y0;
2244
0
    }
2245
2246
0
    if (!opj_t1_allocate_buffers(
2247
0
                t1,
2248
0
                (OPJ_UINT32)(cblk->x1 - cblk->x0),
2249
0
                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2250
0
        *(job->pret) = OPJ_FALSE;
2251
0
        opj_free(job);
2252
0
        return;
2253
0
    }
2254
2255
0
    cblk_w = t1->w;
2256
0
    cblk_h = t1->h;
2257
2258
0
    tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2259
2260
0
    if (tccp->qmfbid == 1) {
2261
        /* Do multiplication on unsigned type, even if the
2262
            * underlying type is signed, to avoid potential
2263
            * int overflow on large value (the output will be
2264
            * incorrect in such situation, but whatever...)
2265
            * This assumes complement-to-2 signed integer
2266
            * representation
2267
            * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2268
            */
2269
0
        OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2270
0
        OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
2271
        /* Change from "natural" order to "zigzag" order of T1 passes */
2272
0
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2273
#if defined(__AVX512F__)
2274
            const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13);
2275
            const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0);
2276
            OPJ_UINT32* ptr = tiledp_u;
2277
            for (i = 0; i < cblk_w / 16; ++i) {
2278
                //                      INPUT                                        OUTPUT
2279
                // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F   00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
2280
                // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F   04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
2281
                // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F   08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
2282
                // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F   0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
2283
                __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2284
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2285
                __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2286
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2287
                __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2288
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2289
                __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
2290
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2291
2292
                __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2);
2293
                __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4);
2294
                __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2);
2295
                __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4);
2296
2297
                in1 = _mm512_unpacklo_epi64(tmp1, tmp2);
2298
                in2 = _mm512_unpacklo_epi64(tmp3, tmp4);
2299
                in3 = _mm512_unpackhi_epi64(tmp1, tmp2);
2300
                in4 = _mm512_unpackhi_epi64(tmp3, tmp4);
2301
2302
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1));
2303
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3));
2304
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2));
2305
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4));
2306
2307
                tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3);
2308
                tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4);
2309
2310
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1));
2311
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2));
2312
                _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1,
2313
                                    0x1));
2314
                _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2,
2315
                                    0x1));
2316
                _mm256_storeu_si256((__m256i*)(t1data + 48),
2317
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3)));
2318
                _mm256_storeu_si256((__m256i*)(t1data + 56),
2319
                                    _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4)));
2320
                t1data += 64;
2321
                ptr += 16;
2322
            }
2323
            for (i = 0; i < cblk_w % 16; ++i) {
2324
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2325
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2326
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2327
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2328
                t1data += 4;
2329
                ptr += 1;
2330
            }
2331
#elif defined(__AVX2__)
2332
            OPJ_UINT32* ptr = tiledp_u;
2333
            for (i = 0; i < cblk_w / 8; ++i) {
2334
                //          INPUT                  OUTPUT
2335
                // 00 01 02 03 04 05 06 07   00 10 20 30 01 11 21 31
2336
                // 10 11 12 13 14 15 16 17   02 12 22 32 03 13 23 33
2337
                // 20 21 22 23 24 25 26 27   04 14 24 34 05 15 25 35
2338
                // 30 31 32 33 34 35 36 37   06 16 26 36 07 17 27 37
2339
                __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2340
                                                (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
2341
                __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2342
                                                (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
2343
                __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2344
                                                (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
2345
                __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
2346
                                                (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
2347
2348
                __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2);
2349
                __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4);
2350
                __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2);
2351
                __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4);
2352
2353
                in1 = _mm256_unpacklo_epi64(tmp1, tmp2);
2354
                in2 = _mm256_unpacklo_epi64(tmp3, tmp4);
2355
                in3 = _mm256_unpackhi_epi64(tmp1, tmp2);
2356
                in4 = _mm256_unpackhi_epi64(tmp3, tmp4);
2357
2358
                _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1));
2359
                _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3));
2360
                _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2));
2361
                _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4));
2362
                _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3,
2363
                                    0x31));
2364
                _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4,
2365
                                    0x31));
2366
                t1data += 32;
2367
                ptr += 8;
2368
            }
2369
            for (i = 0; i < cblk_w % 8; ++i) {
2370
                t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
2371
                t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
2372
                t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
2373
                t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
2374
                t1data += 4;
2375
                ptr += 1;
2376
            }
2377
#else
2378
0
            for (i = 0; i < cblk_w; ++i) {
2379
0
                t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2380
0
                t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2381
0
                t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2382
0
                t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2383
0
                t1data += 4;
2384
0
            }
2385
0
#endif
2386
0
        }
2387
0
        if (j < cblk_h) {
2388
0
            for (i = 0; i < cblk_w; ++i) {
2389
0
                OPJ_UINT32 k;
2390
0
                for (k = j; k < cblk_h; k++) {
2391
0
                    t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
2392
0
                    t1data ++;
2393
0
                }
2394
0
            }
2395
0
        }
2396
0
    } else {        /* if (tccp->qmfbid == 0) */
2397
0
        OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
2398
0
        OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
2399
        /* Change from "natural" order to "zigzag" order of T1 passes */
2400
0
        for (j = 0; j < (cblk_h & ~3U); j += 4) {
2401
0
            for (i = 0; i < cblk_w; ++i) {
2402
0
                t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
2403
0
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2404
0
                t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
2405
0
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2406
0
                t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
2407
0
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2408
0
                t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
2409
0
                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2410
0
                t1data += 4;
2411
0
            }
2412
0
        }
2413
0
        if (j < cblk_h) {
2414
0
            for (i = 0; i < cblk_w; ++i) {
2415
0
                OPJ_UINT32 k;
2416
0
                for (k = j; k < cblk_h; k++) {
2417
0
                    t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
2418
0
                                                      * (1 << T1_NMSEDEC_FRACBITS));
2419
0
                    t1data ++;
2420
0
                }
2421
0
            }
2422
0
        }
2423
0
    }
2424
2425
0
    {
2426
0
        OPJ_FLOAT64 cumwmsedec =
2427
0
            opj_t1_encode_cblk(
2428
0
                t1,
2429
0
                cblk,
2430
0
                band->bandno,
2431
0
                job->compno,
2432
0
                tilec->numresolutions - 1 - resno,
2433
0
                tccp->qmfbid,
2434
0
                band->stepsize,
2435
0
                tccp->cblksty,
2436
0
                job->tile->numcomps,
2437
0
                job->mct_norms,
2438
0
                job->mct_numcomps);
2439
0
        if (job->mutex) {
2440
0
            opj_mutex_lock(job->mutex);
2441
0
        }
2442
0
        job->tile->distotile += cumwmsedec;
2443
0
        if (job->mutex) {
2444
0
            opj_mutex_unlock(job->mutex);
2445
0
        }
2446
0
    }
2447
2448
0
    opj_free(job);
2449
0
}
2450
2451
2452
OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2453
                             opj_tcd_tile_t *tile,
2454
                             opj_tcp_t *tcp,
2455
                             const OPJ_FLOAT64 * mct_norms,
2456
                             OPJ_UINT32 mct_numcomps
2457
                            )
2458
0
{
2459
0
    volatile OPJ_BOOL ret = OPJ_TRUE;
2460
0
    opj_thread_pool_t* tp = tcd->thread_pool;
2461
0
    OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2462
0
    opj_mutex_t* mutex = opj_mutex_create();
2463
2464
0
    tile->distotile = 0;
2465
2466
0
    for (compno = 0; compno < tile->numcomps; ++compno) {
2467
0
        opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2468
0
        opj_tccp_t* tccp = &tcp->tccps[compno];
2469
2470
0
        for (resno = 0; resno < tilec->numresolutions; ++resno) {
2471
0
            opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2472
2473
0
            for (bandno = 0; bandno < res->numbands; ++bandno) {
2474
0
                opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2475
2476
                /* Skip empty bands */
2477
0
                if (opj_tcd_is_band_empty(band)) {
2478
0
                    continue;
2479
0
                }
2480
0
                for (precno = 0; precno < res->pw * res->ph; ++precno) {
2481
0
                    opj_tcd_precinct_t *prc = &band->precincts[precno];
2482
2483
0
                    for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2484
0
                        opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2485
2486
0
                        opj_t1_cblk_encode_processing_job_t* job =
2487
0
                            (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2488
0
                                    sizeof(opj_t1_cblk_encode_processing_job_t));
2489
0
                        if (!job) {
2490
0
                            ret = OPJ_FALSE;
2491
0
                            goto end;
2492
0
                        }
2493
0
                        job->compno = compno;
2494
0
                        job->tile = tile;
2495
0
                        job->resno = resno;
2496
0
                        job->cblk = cblk;
2497
0
                        job->band = band;
2498
0
                        job->tilec = tilec;
2499
0
                        job->tccp = tccp;
2500
0
                        job->mct_norms = mct_norms;
2501
0
                        job->mct_numcomps = mct_numcomps;
2502
0
                        job->pret = &ret;
2503
0
                        job->mutex = mutex;
2504
0
                        opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
2505
2506
0
                    } /* cblkno */
2507
0
                } /* precno */
2508
0
            } /* bandno */
2509
0
        } /* resno  */
2510
0
    } /* compno  */
2511
2512
0
end:
2513
0
    opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2514
0
    if (mutex) {
2515
0
        opj_mutex_destroy(mutex);
2516
0
    }
2517
2518
0
    return ret;
2519
0
}
2520
2521
/* Returns whether the pass (bpno, passtype) is terminated */
2522
static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2523
                                   OPJ_UINT32 cblksty,
2524
                                   OPJ_INT32 bpno,
2525
                                   OPJ_UINT32 passtype)
2526
0
{
2527
    /* Is it the last cleanup pass ? */
2528
0
    if (passtype == 2 && bpno == 0) {
2529
0
        return OPJ_TRUE;
2530
0
    }
2531
2532
0
    if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2533
0
        return OPJ_TRUE;
2534
0
    }
2535
2536
0
    if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2537
        /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2538
0
        if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2539
0
            return OPJ_TRUE;
2540
0
        }
2541
        /* and beyond terminate all the magnitude refinement passes (in raw) */
2542
        /* and cleanup passes (in MQC) */
2543
0
        if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2544
0
            return OPJ_TRUE;
2545
0
        }
2546
0
    }
2547
2548
0
    return OPJ_FALSE;
2549
0
}
2550
2551
2552
static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2553
                                      opj_tcd_cblk_enc_t* cblk,
2554
                                      OPJ_UINT32 orient,
2555
                                      OPJ_UINT32 compno,
2556
                                      OPJ_UINT32 level,
2557
                                      OPJ_UINT32 qmfbid,
2558
                                      OPJ_FLOAT64 stepsize,
2559
                                      OPJ_UINT32 cblksty,
2560
                                      OPJ_UINT32 numcomps,
2561
                                      const OPJ_FLOAT64 * mct_norms,
2562
                                      OPJ_UINT32 mct_numcomps)
2563
0
{
2564
0
    OPJ_FLOAT64 cumwmsedec = 0.0;
2565
2566
0
    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2567
2568
0
    OPJ_UINT32 passno;
2569
0
    OPJ_INT32 bpno;
2570
0
    OPJ_UINT32 passtype;
2571
0
    OPJ_INT32 nmsedec = 0;
2572
0
    OPJ_INT32 max;
2573
0
    OPJ_UINT32 i, j;
2574
0
    OPJ_BYTE type = T1_TYPE_MQ;
2575
0
    OPJ_FLOAT64 tempwmsedec;
2576
0
    OPJ_INT32* datap;
2577
2578
#ifdef EXTRA_DEBUG
2579
    printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2580
           cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2581
#endif
2582
2583
0
    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2584
2585
0
    max = 0;
2586
0
    datap = t1->data;
2587
0
    for (j = 0; j < t1->h; ++j) {
2588
0
        const OPJ_UINT32 w = t1->w;
2589
0
        for (i = 0; i < w; ++i, ++datap) {
2590
0
            OPJ_INT32 tmp = *datap;
2591
0
            if (tmp < 0) {
2592
0
                OPJ_UINT32 tmp_unsigned;
2593
0
                if (tmp == INT_MIN) {
2594
                    /* To avoid undefined behaviour when negating INT_MIN */
2595
                    /* but if we go here, it means we have supplied an input */
2596
                    /* with more bit depth than we we can really support. */
2597
                    /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */
2598
0
                    tmp = INT_MIN + 1;
2599
0
                }
2600
0
                max = opj_int_max(max, -tmp);
2601
0
                tmp_unsigned = opj_to_smr(tmp);
2602
0
                memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
2603
0
            } else {
2604
0
                max = opj_int_max(max, tmp);
2605
0
            }
2606
0
        }
2607
0
    }
2608
2609
0
    cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2610
0
                                      T1_NMSEDEC_FRACBITS) : 0;
2611
0
    if (cblk->numbps == 0) {
2612
0
        cblk->totalpasses = 0;
2613
0
        return cumwmsedec;
2614
0
    }
2615
2616
0
    bpno = (OPJ_INT32)(cblk->numbps - 1);
2617
0
    passtype = 2;
2618
2619
0
    opj_mqc_resetstates(mqc);
2620
0
    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2621
0
    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2622
0
    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2623
0
    opj_mqc_init_enc(mqc, cblk->data);
2624
2625
0
    for (passno = 0; bpno >= 0; ++passno) {
2626
0
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2627
0
        type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2628
0
                (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2629
2630
        /* If the previous pass was terminating, we need to reset the encoder */
2631
0
        if (passno > 0 && cblk->passes[passno - 1].term) {
2632
0
            if (type == T1_TYPE_RAW) {
2633
0
                opj_mqc_bypass_init_enc(mqc);
2634
0
            } else {
2635
0
                opj_mqc_restart_init_enc(mqc);
2636
0
            }
2637
0
        }
2638
2639
0
        switch (passtype) {
2640
0
        case 0:
2641
0
            opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2642
0
            break;
2643
0
        case 1:
2644
0
            opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2645
0
            break;
2646
0
        case 2:
2647
0
            opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2648
            /* code switch SEGMARK (i.e. SEGSYM) */
2649
0
            if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2650
0
                opj_mqc_segmark_enc(mqc);
2651
0
            }
2652
0
            break;
2653
0
        }
2654
2655
0
        tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2656
0
                                        stepsize, numcomps, mct_norms, mct_numcomps) ;
2657
0
        cumwmsedec += tempwmsedec;
2658
0
        pass->distortiondec = cumwmsedec;
2659
2660
0
        if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2661
            /* If it is a terminated pass, terminate it */
2662
0
            if (type == T1_TYPE_RAW) {
2663
0
                opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2664
0
            } else {
2665
0
                if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2666
0
                    opj_mqc_erterm_enc(mqc);
2667
0
                } else {
2668
0
                    opj_mqc_flush(mqc);
2669
0
                }
2670
0
            }
2671
0
            pass->term = 1;
2672
0
            pass->rate = opj_mqc_numbytes(mqc);
2673
0
        } else {
2674
            /* Non terminated pass */
2675
0
            OPJ_UINT32 rate_extra_bytes;
2676
0
            if (type == T1_TYPE_RAW) {
2677
0
                rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2678
0
                                       mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2679
0
            } else {
2680
0
                rate_extra_bytes = 3;
2681
0
            }
2682
0
            pass->term = 0;
2683
0
            pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2684
0
        }
2685
2686
0
        if (++passtype == 3) {
2687
0
            passtype = 0;
2688
0
            bpno--;
2689
0
        }
2690
2691
        /* Code-switch "RESET" */
2692
0
        if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2693
0
            opj_mqc_reset_enc(mqc);
2694
0
        }
2695
0
    }
2696
2697
0
    cblk->totalpasses = passno;
2698
2699
0
    if (cblk->totalpasses) {
2700
        /* Make sure that pass rates are increasing */
2701
0
        OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2702
0
        for (passno = cblk->totalpasses; passno > 0;) {
2703
0
            opj_tcd_pass_t *pass = &cblk->passes[--passno];
2704
0
            if (pass->rate > last_pass_rate) {
2705
0
                pass->rate = last_pass_rate;
2706
0
            } else {
2707
0
                last_pass_rate = pass->rate;
2708
0
            }
2709
0
        }
2710
0
    }
2711
2712
0
    for (passno = 0; passno < cblk->totalpasses; passno++) {
2713
0
        opj_tcd_pass_t *pass = &cblk->passes[passno];
2714
2715
        /* Prevent generation of FF as last data byte of a pass*/
2716
        /* For terminating passes, the flushing procedure ensured this already */
2717
0
        assert(pass->rate > 0);
2718
0
        if (cblk->data[pass->rate - 1] == 0xFF) {
2719
0
            pass->rate--;
2720
0
        }
2721
0
        pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2722
0
    }
2723
2724
#ifdef EXTRA_DEBUG
2725
    printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2726
2727
    /* Check that there not 0xff >=0x90 sequences */
2728
    if (cblk->totalpasses) {
2729
        OPJ_UINT32 i;
2730
        OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2731
        for (i = 1; i < len; ++i) {
2732
            if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2733
                printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2734
                abort();
2735
            }
2736
        }
2737
    }
2738
#endif
2739
2740
0
    return cumwmsedec;
2741
0
}