Coverage Report

Created: 2023-12-08 06:53

/src/freeimage-svn/FreeImage/trunk/Source/LibOpenJPEG/dwt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2002-2007, Communications and Remote Sensing Laboratory, Universite catholique de Louvain (UCL), Belgium
3
 * Copyright (c) 2002-2007, Professor Benoit Macq
4
 * Copyright (c) 2001-2003, David Janssens
5
 * Copyright (c) 2002-2003, Yannick Verschueren
6
 * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe
7
 * Copyright (c) 2005, Herve Drolon, FreeImage Team
8
 * Copyright (c) 2007, Jonathan Ballard <dzonatas@dzonux.net>
9
 * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
10
 * All rights reserved.
11
 *
12
 * Redistribution and use in source and binary forms, with or without
13
 * modification, are permitted provided that the following conditions
14
 * are met:
15
 * 1. Redistributions of source code must retain the above copyright
16
 *    notice, this list of conditions and the following disclaimer.
17
 * 2. Redistributions in binary form must reproduce the above copyright
18
 *    notice, this list of conditions and the following disclaimer in the
19
 *    documentation and/or other materials provided with the distribution.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
22
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31
 * POSSIBILITY OF SUCH DAMAGE.
32
 */
33
34
#ifdef __SSE__
35
#include <xmmintrin.h>
36
#endif
37
38
#include "opj_includes.h"
39
40
/** @defgroup DWT DWT - Implementation of a discrete wavelet transform */
41
/*@{*/
42
43
#define OPJ_WS(i) v->mem[(i)*2]
44
#define OPJ_WD(i) v->mem[(1+(i)*2)]
45
46
/** @name Local data structures */
47
/*@{*/
48
49
typedef struct dwt_local {
50
  OPJ_INT32* mem;
51
  OPJ_INT32 dn;
52
  OPJ_INT32 sn;
53
  OPJ_INT32 cas;
54
} opj_dwt_t;
55
56
typedef union {
57
  OPJ_FLOAT32 f[4];
58
} opj_v4_t;
59
60
typedef struct v4dwt_local {
61
  opj_v4_t* wavelet ;
62
  OPJ_INT32   dn ;
63
  OPJ_INT32   sn ;
64
  OPJ_INT32   cas ;
65
} opj_v4dwt_t ;
66
67
static const OPJ_FLOAT32 opj_dwt_alpha =  1.586134342f; /*  12994 */
68
static const OPJ_FLOAT32 opj_dwt_beta  =  0.052980118f; /*    434 */
69
static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /*  -7233 */
70
static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /*  -3633 */
71
72
static const OPJ_FLOAT32 opj_K      = 1.230174105f; /*  10078 */
73
static const OPJ_FLOAT32 opj_c13318 = 1.625732422f;
74
75
/*@}*/
76
77
/**
78
Virtual function type for wavelet transform in 1-D 
79
*/
80
typedef void (*DWT1DFN)(opj_dwt_t* v);
81
82
/** @name Local static functions */
83
/*@{*/
84
85
/**
86
Forward lazy transform (horizontal)
87
*/
88
static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
89
/**
90
Forward lazy transform (vertical)
91
*/
92
static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas);
93
/**
94
Inverse lazy transform (horizontal)
95
*/
96
static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a);
97
/**
98
Inverse lazy transform (vertical)
99
*/
100
static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x);
101
/**
102
Forward 5-3 wavelet transform in 1-D
103
*/
104
static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
105
/**
106
Inverse 5-3 wavelet transform in 1-D
107
*/
108
static void opj_dwt_decode_1(opj_dwt_t *v);
109
static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
110
/**
111
Forward 9-7 wavelet transform in 1-D
112
*/
113
static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
114
/**
115
Explicit calculation of the Quantization Stepsizes 
116
*/
117
static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize);
118
/**
119
Inverse wavelet transform in 2-D.
120
*/
121
static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn);
122
123
static OPJ_BOOL opj_dwt_encode_procedure( opj_tcd_tilecomp_t * tilec,
124
                        void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) );
125
126
static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i);
127
128
/* <summary>                             */
129
/* Inverse 9-7 wavelet transform in 1-D. */
130
/* </summary>                            */
131
static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt);
132
133
static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size);
134
135
static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read);
136
137
#ifdef __SSE__
138
static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c);
139
140
static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c);
141
142
#else
143
static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c);
144
145
static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c);
146
147
#endif
148
149
/*@}*/
150
151
/*@}*/
152
153
0
#define OPJ_S(i) a[(i)*2]
154
0
#define OPJ_D(i) a[(1+(i)*2)]
155
0
#define OPJ_S_(i) ((i)<0?OPJ_S(0):((i)>=sn?OPJ_S(sn-1):OPJ_S(i)))
156
0
#define OPJ_D_(i) ((i)<0?OPJ_D(0):((i)>=dn?OPJ_D(dn-1):OPJ_D(i)))
157
/* new */
158
0
#define OPJ_SS_(i) ((i)<0?OPJ_S(0):((i)>=dn?OPJ_S(dn-1):OPJ_S(i)))
159
0
#define OPJ_DD_(i) ((i)<0?OPJ_D(0):((i)>=sn?OPJ_D(sn-1):OPJ_D(i)))
160
161
/* <summary>                                                              */
162
/* This table contains the norms of the 5-3 wavelets for different bands. */
163
/* </summary>                                                             */
164
static const OPJ_FLOAT64 opj_dwt_norms[4][10] = {
165
  {1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3},
166
  {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
167
  {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
168
  {.7186, .9218, 1.586, 3.043, 6.019, 12.01, 24.00, 47.97, 95.93}
169
};
170
171
/* <summary>                                                              */
172
/* This table contains the norms of the 9-7 wavelets for different bands. */
173
/* </summary>                                                             */
174
static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = {
175
  {1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9},
176
  {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
177
  {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
178
  {2.080, 3.865, 8.307, 17.18, 34.71, 69.59, 139.3, 278.6, 557.2}
179
};
180
181
/* 
182
==========================================================
183
   local functions
184
==========================================================
185
*/
186
187
/* <summary>                       */
188
/* Forward lazy transform (horizontal).  */
189
/* </summary>                            */ 
190
0
void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
191
0
  OPJ_INT32 i;
192
0
  OPJ_INT32 * l_dest = b;
193
0
  OPJ_INT32 * l_src = a+cas;
194
195
0
    for (i=0; i<sn; ++i) {
196
0
    *l_dest++ = *l_src;
197
0
    l_src += 2;
198
0
  }
199
  
200
0
    l_dest = b + sn;
201
0
  l_src = a + 1 - cas;
202
203
0
    for (i=0; i<dn; ++i)  {
204
0
    *l_dest++=*l_src;
205
0
    l_src += 2;
206
0
  }
207
0
}
208
209
/* <summary>                             */  
210
/* Forward lazy transform (vertical).    */
211
/* </summary>                            */ 
212
0
void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) {
213
0
    OPJ_INT32 i = sn;
214
0
  OPJ_INT32 * l_dest = b;
215
0
  OPJ_INT32 * l_src = a+cas;
216
217
0
    while (i--) {
218
0
    *l_dest = *l_src;
219
0
    l_dest += x;
220
0
    l_src += 2;
221
0
    } /* b[i*x]=a[2*i+cas]; */
222
223
0
  l_dest = b + sn * x;
224
0
  l_src = a + 1 - cas;
225
  
226
0
  i = dn;
227
0
    while (i--) {
228
0
    *l_dest = *l_src;
229
0
    l_dest += x;
230
0
    l_src += 2;
231
0
        } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/
232
0
}
233
234
/* <summary>                             */
235
/* Inverse lazy transform (horizontal).  */
236
/* </summary>                            */
237
0
void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a) {
238
0
    OPJ_INT32 *ai = a;
239
0
    OPJ_INT32 *bi = h->mem + h->cas;
240
0
    OPJ_INT32  i  = h->sn;
241
0
    while( i-- ) {
242
0
      *bi = *(ai++);
243
0
    bi += 2;
244
0
    }
245
0
    ai  = a + h->sn;
246
0
    bi  = h->mem + 1 - h->cas;
247
0
    i = h->dn ;
248
0
    while( i-- ) {
249
0
      *bi = *(ai++);
250
0
    bi += 2;
251
0
    }
252
0
}
253
254
/* <summary>                             */  
255
/* Inverse lazy transform (vertical).    */
256
/* </summary>                            */ 
257
0
void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) {
258
0
    OPJ_INT32 *ai = a;
259
0
    OPJ_INT32 *bi = v->mem + v->cas;
260
0
    OPJ_INT32  i = v->sn;
261
0
    while( i-- ) {
262
0
      *bi = *ai;
263
0
    bi += 2;
264
0
    ai += x;
265
0
    }
266
0
    ai = a + (v->sn * x);
267
0
    bi = v->mem + 1 - v->cas;
268
0
    i = v->dn ;
269
0
    while( i-- ) {
270
0
      *bi = *ai;
271
0
    bi += 2;  
272
0
    ai += x;
273
0
    }
274
0
}
275
276
277
/* <summary>                            */
278
/* Forward 5-3 wavelet transform in 1-D. */
279
/* </summary>                           */
280
0
void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
281
0
  OPJ_INT32 i;
282
  
283
0
  if (!cas) {
284
0
    if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
285
0
      for (i = 0; i < dn; i++) OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
286
0
      for (i = 0; i < sn; i++) OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
287
0
    }
288
0
  } else {
289
0
    if (!sn && dn == 1)       /* NEW :  CASE ONE ELEMENT */
290
0
      OPJ_S(0) *= 2;
291
0
    else {
292
0
      for (i = 0; i < dn; i++) OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
293
0
      for (i = 0; i < sn; i++) OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
294
0
    }
295
0
  }
296
0
}
297
298
/* <summary>                            */
299
/* Inverse 5-3 wavelet transform in 1-D. */
300
/* </summary>                           */ 
301
0
void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
302
0
  OPJ_INT32 i;
303
  
304
0
  if (!cas) {
305
0
    if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
306
0
      for (i = 0; i < sn; i++) OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
307
0
      for (i = 0; i < dn; i++) OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
308
0
    }
309
0
  } else {
310
0
    if (!sn  && dn == 1)          /* NEW :  CASE ONE ELEMENT */
311
0
      OPJ_S(0) /= 2;
312
0
    else {
313
0
      for (i = 0; i < sn; i++) OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
314
0
      for (i = 0; i < dn; i++) OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
315
0
    }
316
0
  }
317
0
}
318
319
/* <summary>                            */
320
/* Inverse 5-3 wavelet transform in 1-D. */
321
/* </summary>                           */ 
322
0
void opj_dwt_decode_1(opj_dwt_t *v) {
323
0
  opj_dwt_decode_1_(v->mem, v->dn, v->sn, v->cas);
324
0
}
325
326
/* <summary>                             */
327
/* Forward 9-7 wavelet transform in 1-D. */
328
/* </summary>                            */
329
0
void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
330
0
  OPJ_INT32 i;
331
0
  if (!cas) {
332
0
    if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
333
0
      for (i = 0; i < dn; i++)
334
0
        OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993);
335
0
      for (i = 0; i < sn; i++)
336
0
        OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434);
337
0
      for (i = 0; i < dn; i++)
338
0
        OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233);
339
0
      for (i = 0; i < sn; i++)
340
0
        OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633);
341
0
      for (i = 0; i < dn; i++)
342
0
        OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */
343
0
      for (i = 0; i < sn; i++)
344
0
        OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */
345
0
    }
346
0
  } else {
347
0
    if ((sn > 0) || (dn > 1)) { /* NEW :  CASE ONE ELEMENT */
348
0
      for (i = 0; i < dn; i++)
349
0
        OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993);
350
0
      for (i = 0; i < sn; i++)
351
0
        OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434);
352
0
      for (i = 0; i < dn; i++)
353
0
        OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233);
354
0
      for (i = 0; i < sn; i++)
355
0
        OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633);
356
0
      for (i = 0; i < dn; i++)
357
0
        OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */
358
0
      for (i = 0; i < sn; i++)
359
0
        OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */
360
0
    }
361
0
  }
362
0
}
363
364
0
void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize) {
365
0
  OPJ_INT32 p, n;
366
0
  p = opj_int_floorlog2(stepsize) - 13;
367
0
  n = 11 - opj_int_floorlog2(stepsize);
368
0
  bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff;
369
0
  bandno_stepsize->expn = numbps - p;
370
0
}
371
372
/* 
373
==========================================================
374
   DWT interface
375
==========================================================
376
*/
377
378
379
/* <summary>                            */
380
/* Forward 5-3 wavelet transform in 2-D. */
381
/* </summary>                           */
382
INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) )
383
0
{
384
0
  OPJ_INT32 i, j, k;
385
0
  OPJ_INT32 *a = 00;
386
0
  OPJ_INT32 *aj = 00;
387
0
  OPJ_INT32 *bj = 00;
388
0
  OPJ_INT32 w, l;
389
390
0
  OPJ_INT32 rw;     /* width of the resolution level computed   */
391
0
  OPJ_INT32 rh;     /* height of the resolution level computed  */
392
0
  OPJ_UINT32 l_data_size;
393
394
0
  opj_tcd_resolution_t * l_cur_res = 0;
395
0
  opj_tcd_resolution_t * l_last_res = 0;
396
397
0
  w = tilec->x1-tilec->x0;
398
0
  l = (OPJ_INT32)tilec->numresolutions-1;
399
0
  a = tilec->data;
400
401
0
  l_cur_res = tilec->resolutions + l;
402
0
  l_last_res = l_cur_res - 1;
403
404
0
  l_data_size = opj_dwt_max_resolution( tilec->resolutions,tilec->numresolutions) * (OPJ_UINT32)sizeof(OPJ_INT32);
405
0
  bj = (OPJ_INT32*)opj_malloc((size_t)l_data_size);
406
0
  if (! bj) {
407
0
    return OPJ_FALSE;
408
0
  }
409
0
  i = l;
410
411
0
  while (i--) {
412
0
    OPJ_INT32 rw1;    /* width of the resolution level once lower than computed one                                       */
413
0
    OPJ_INT32 rh1;    /* height of the resolution level once lower than computed one                                      */
414
0
    OPJ_INT32 cas_col;  /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */
415
0
    OPJ_INT32 cas_row;  /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering   */
416
0
    OPJ_INT32 dn, sn;
417
418
0
    rw  = l_cur_res->x1 - l_cur_res->x0;
419
0
    rh  = l_cur_res->y1 - l_cur_res->y0;
420
0
    rw1 = l_last_res->x1 - l_last_res->x0;
421
0
    rh1 = l_last_res->y1 - l_last_res->y0;
422
423
0
    cas_row = l_cur_res->x0 & 1;
424
0
    cas_col = l_cur_res->y0 & 1;
425
426
0
    sn = rh1;
427
0
    dn = rh - rh1;
428
0
    for (j = 0; j < rw; ++j) {
429
0
      aj = a + j;
430
0
      for (k = 0; k < rh; ++k) {
431
0
        bj[k] = aj[k*w];
432
0
      }
433
434
0
      (*p_function) (bj, dn, sn, cas_col);
435
436
0
      opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col);
437
0
    }
438
439
0
    sn = rw1;
440
0
    dn = rw - rw1;
441
442
0
    for (j = 0; j < rh; j++) {
443
0
      aj = a + j * w;
444
0
      for (k = 0; k < rw; k++)  bj[k] = aj[k];
445
0
      (*p_function) (bj, dn, sn, cas_row);
446
0
      opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row);
447
0
    }
448
449
0
    l_cur_res = l_last_res;
450
451
0
    --l_last_res;
452
0
  }
453
454
0
  opj_free(bj);
455
0
  return OPJ_TRUE;
456
0
}
457
458
/* Forward 5-3 wavelet transform in 2-D. */
459
/* </summary>                           */
460
OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec)
461
0
{
462
0
  return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1);
463
0
}
464
465
/* <summary>                            */
466
/* Inverse 5-3 wavelet transform in 2-D. */
467
/* </summary>                           */
468
0
OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) {
469
0
  return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1);
470
0
}
471
472
473
/* <summary>                          */
474
/* Get gain of 5-3 wavelet transform. */
475
/* </summary>                         */
476
0
OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) {
477
0
  if (orient == 0)
478
0
    return 0;
479
0
  if (orient == 1 || orient == 2)
480
0
    return 1;
481
0
  return 2;
482
0
}
483
484
/* <summary>                */
485
/* Get norm of 5-3 wavelet. */
486
/* </summary>               */
487
0
OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) {
488
0
  return opj_dwt_norms[orient][level];
489
0
}
490
491
/* <summary>                             */
492
/* Forward 9-7 wavelet transform in 2-D. */
493
/* </summary>                            */
494
OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec)
495
0
{
496
0
  return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1_real);
497
0
}
498
499
/* <summary>                          */
500
/* Get gain of 9-7 wavelet transform. */
501
/* </summary>                         */
502
0
OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) {
503
0
  (void)orient;
504
0
  return 0;
505
0
}
506
507
/* <summary>                */
508
/* Get norm of 9-7 wavelet. */
509
/* </summary>               */
510
0
OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) {
511
0
  return opj_dwt_norms_real[orient][level];
512
0
}
513
514
0
void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) {
515
0
  OPJ_UINT32 numbands, bandno;
516
0
  numbands = 3 * tccp->numresolutions - 2;
517
0
  for (bandno = 0; bandno < numbands; bandno++) {
518
0
    OPJ_FLOAT64 stepsize;
519
0
    OPJ_UINT32 resno, level, orient, gain;
520
521
0
    resno = (bandno == 0) ? 0 : ((bandno - 1) / 3 + 1);
522
0
    orient = (bandno == 0) ? 0 : ((bandno - 1) % 3 + 1);
523
0
    level = tccp->numresolutions - 1 - resno;
524
0
    gain = (tccp->qmfbid == 0) ? 0 : ((orient == 0) ? 0 : (((orient == 1) || (orient == 2)) ? 1 : 2));
525
0
    if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) {
526
0
      stepsize = 1.0;
527
0
    } else {
528
0
      OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level];
529
0
      stepsize = (1 << (gain)) / norm;
530
0
    }
531
0
    opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), (OPJ_INT32)(prec + gain), &tccp->stepsizes[bandno]);
532
0
  }
533
0
}
534
535
/* <summary>                             */
536
/* Determine maximum computed resolution level for inverse wavelet transform */
537
/* </summary>                            */
538
0
OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i) {
539
0
  OPJ_UINT32 mr = 0;
540
0
  OPJ_UINT32 w;
541
0
  while( --i ) {
542
0
    ++r;
543
0
    if( mr < ( w = (OPJ_UINT32)(r->x1 - r->x0) ) )
544
0
      mr = w ;
545
0
    if( mr < ( w = (OPJ_UINT32)(r->y1 - r->y0) ) )
546
0
      mr = w ;
547
0
  }
548
0
  return mr ;
549
0
}
550
551
/* <summary>                            */
552
/* Inverse wavelet transform in 2-D.     */
553
/* </summary>                           */
554
0
OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) {
555
0
  opj_dwt_t h;
556
0
  opj_dwt_t v;
557
558
0
  opj_tcd_resolution_t* tr = tilec->resolutions;
559
560
0
  OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0);  /* width of the resolution level computed */
561
0
  OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0);  /* height of the resolution level computed */
562
563
0
  OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
564
565
0
  h.mem = (OPJ_INT32*)
566
0
  opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
567
0
  if (! h.mem){
568
0
    return OPJ_FALSE;
569
0
  }
570
571
0
  v.mem = h.mem;
572
573
0
  while( --numres) {
574
0
    OPJ_INT32 * restrict tiledp = tilec->data;
575
0
    OPJ_UINT32 j;
576
577
0
    ++tr;
578
0
    h.sn = (OPJ_INT32)rw;
579
0
    v.sn = (OPJ_INT32)rh;
580
581
0
    rw = (OPJ_UINT32)(tr->x1 - tr->x0);
582
0
    rh = (OPJ_UINT32)(tr->y1 - tr->y0);
583
584
0
    h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
585
0
    h.cas = tr->x0 % 2;
586
587
0
    for(j = 0; j < rh; ++j) {
588
0
      opj_dwt_interleave_h(&h, &tiledp[j*w]);
589
0
      (dwt_1D)(&h);
590
0
      memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32));
591
0
    }
592
593
0
    v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
594
0
    v.cas = tr->y0 % 2;
595
596
0
    for(j = 0; j < rw; ++j){
597
0
      OPJ_UINT32 k;
598
0
      opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w);
599
0
      (dwt_1D)(&v);
600
0
      for(k = 0; k < rh; ++k) {
601
0
        tiledp[k * w + j] = v.mem[k];
602
0
      }
603
0
    }
604
0
  }
605
0
  opj_aligned_free(h.mem);
606
0
  return OPJ_TRUE;
607
0
}
608
609
0
void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){
610
0
  OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas);
611
0
  OPJ_INT32 count = w->sn;
612
0
  OPJ_INT32 i, k;
613
614
0
  for(k = 0; k < 2; ++k){
615
0
    if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) {
616
      /* Fast code path */
617
0
      for(i = 0; i < count; ++i){
618
0
        OPJ_INT32 j = i;
619
0
        bi[i*8    ] = a[j];
620
0
        j += x;
621
0
        bi[i*8 + 1] = a[j];
622
0
        j += x;
623
0
        bi[i*8 + 2] = a[j];
624
0
        j += x;
625
0
        bi[i*8 + 3] = a[j];
626
0
      }
627
0
    }
628
0
    else {
629
      /* Slow code path */
630
0
      for(i = 0; i < count; ++i){
631
0
        OPJ_INT32 j = i;
632
0
        bi[i*8    ] = a[j];
633
0
        j += x;
634
0
        if(j >= size) continue;
635
0
        bi[i*8 + 1] = a[j];
636
0
        j += x;
637
0
        if(j >= size) continue;
638
0
        bi[i*8 + 2] = a[j];
639
0
        j += x;
640
0
        if(j >= size) continue;
641
0
        bi[i*8 + 3] = a[j]; /* This one*/
642
0
      }
643
0
    }
644
645
0
    bi = (OPJ_FLOAT32*) (w->wavelet + 1 - w->cas);
646
0
    a += w->sn;
647
0
    size -= w->sn;
648
0
    count = w->dn;
649
0
  }
650
0
}
651
652
0
void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){
653
0
  opj_v4_t* restrict bi = v->wavelet + v->cas;
654
0
  OPJ_INT32 i;
655
656
0
  for(i = 0; i < v->sn; ++i){
657
0
    memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
658
0
  }
659
660
0
  a += v->sn * x;
661
0
  bi = v->wavelet + 1 - v->cas;
662
663
0
  for(i = 0; i < v->dn; ++i){
664
0
    memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
665
0
  }
666
0
}
667
668
#ifdef __SSE__
669
670
0
void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){
671
0
  __m128* restrict vw = (__m128*) w;
672
0
  OPJ_INT32 i;
673
  /* 4x unrolled loop */
674
0
  for(i = 0; i < count >> 2; ++i){
675
0
    *vw = _mm_mul_ps(*vw, c);
676
0
    vw += 2;
677
0
    *vw = _mm_mul_ps(*vw, c);
678
0
    vw += 2;
679
0
    *vw = _mm_mul_ps(*vw, c);
680
0
    vw += 2;
681
0
    *vw = _mm_mul_ps(*vw, c);
682
0
    vw += 2;
683
0
  }
684
0
  count &= 3;
685
0
  for(i = 0; i < count; ++i){
686
0
    *vw = _mm_mul_ps(*vw, c);
687
0
    vw += 2;
688
0
  }
689
0
}
690
691
0
void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){
692
0
  __m128* restrict vl = (__m128*) l;
693
0
  __m128* restrict vw = (__m128*) w;
694
0
  OPJ_INT32 i;
695
0
  __m128 tmp1, tmp2, tmp3;
696
0
  tmp1 = vl[0];
697
0
  for(i = 0; i < m; ++i){
698
0
    tmp2 = vw[-1];
699
0
    tmp3 = vw[ 0];
700
0
    vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
701
0
    tmp1 = tmp3;
702
0
    vw += 2;
703
0
  }
704
0
  vl = vw - 2;
705
0
  if(m >= k){
706
0
    return;
707
0
  }
708
0
  c = _mm_add_ps(c, c);
709
0
  c = _mm_mul_ps(c, vl[0]);
710
0
  for(; m < k; ++m){
711
0
    __m128 tmp = vw[-1];
712
0
    vw[-1] = _mm_add_ps(tmp, c);
713
0
    vw += 2;
714
0
  }
715
0
}
716
717
#else
718
719
void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c)
720
{
721
  OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w;
722
  OPJ_INT32 i;
723
  for(i = 0; i < count; ++i){
724
    OPJ_FLOAT32 tmp1 = fw[i*8    ];
725
    OPJ_FLOAT32 tmp2 = fw[i*8 + 1];
726
    OPJ_FLOAT32 tmp3 = fw[i*8 + 2];
727
    OPJ_FLOAT32 tmp4 = fw[i*8 + 3];
728
    fw[i*8    ] = tmp1 * c;
729
    fw[i*8 + 1] = tmp2 * c;
730
    fw[i*8 + 2] = tmp3 * c;
731
    fw[i*8 + 3] = tmp4 * c;
732
  }
733
}
734
735
void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c)
736
{
737
  OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l;
738
  OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w;
739
  OPJ_INT32 i;
740
  for(i = 0; i < m; ++i){
741
    OPJ_FLOAT32 tmp1_1 = fl[0];
742
    OPJ_FLOAT32 tmp1_2 = fl[1];
743
    OPJ_FLOAT32 tmp1_3 = fl[2];
744
    OPJ_FLOAT32 tmp1_4 = fl[3];
745
    OPJ_FLOAT32 tmp2_1 = fw[-4];
746
    OPJ_FLOAT32 tmp2_2 = fw[-3];
747
    OPJ_FLOAT32 tmp2_3 = fw[-2];
748
    OPJ_FLOAT32 tmp2_4 = fw[-1];
749
    OPJ_FLOAT32 tmp3_1 = fw[0];
750
    OPJ_FLOAT32 tmp3_2 = fw[1];
751
    OPJ_FLOAT32 tmp3_3 = fw[2];
752
    OPJ_FLOAT32 tmp3_4 = fw[3];
753
    fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c);
754
    fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c);
755
    fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c);
756
    fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c);
757
    fl = fw;
758
    fw += 8;
759
  }
760
  if(m < k){
761
    OPJ_FLOAT32 c1;
762
    OPJ_FLOAT32 c2;
763
    OPJ_FLOAT32 c3;
764
    OPJ_FLOAT32 c4;
765
    c += c;
766
    c1 = fl[0] * c;
767
    c2 = fl[1] * c;
768
    c3 = fl[2] * c;
769
    c4 = fl[3] * c;
770
    for(; m < k; ++m){
771
      OPJ_FLOAT32 tmp1 = fw[-4];
772
      OPJ_FLOAT32 tmp2 = fw[-3];
773
      OPJ_FLOAT32 tmp3 = fw[-2];
774
      OPJ_FLOAT32 tmp4 = fw[-1];
775
      fw[-4] = tmp1 + c1;
776
      fw[-3] = tmp2 + c2;
777
      fw[-2] = tmp3 + c3;
778
      fw[-1] = tmp4 + c4;
779
      fw += 8;
780
    }
781
  }
782
}
783
784
#endif
785
786
/* <summary>                             */
787
/* Inverse 9-7 wavelet transform in 1-D. */
788
/* </summary>                            */
789
void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt)
790
0
{
791
0
  OPJ_INT32 a, b;
792
0
  if(dwt->cas == 0) {
793
0
    if(!((dwt->dn > 0) || (dwt->sn > 1))){
794
0
      return;
795
0
    }
796
0
    a = 0;
797
0
    b = 1;
798
0
  }else{
799
0
    if(!((dwt->sn > 0) || (dwt->dn > 1))) {
800
0
      return;
801
0
    }
802
0
    a = 1;
803
0
    b = 0;
804
0
  }
805
0
#ifdef __SSE__
806
0
  opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K));
807
0
  opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318));
808
0
  opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta));
809
0
  opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma));
810
0
  opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta));
811
0
  opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha));
812
#else
813
  opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K);
814
  opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318);
815
  opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_delta);
816
  opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma);
817
  opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_beta);
818
  opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha);
819
#endif
820
0
}
821
822
823
/* <summary>                             */
824
/* Inverse 9-7 wavelet transform in 2-D. */
825
/* </summary>                            */
826
OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres)
827
0
{
828
0
  opj_v4dwt_t h;
829
0
  opj_v4dwt_t v;
830
831
0
  opj_tcd_resolution_t* res = tilec->resolutions;
832
833
0
  OPJ_UINT32 rw = (OPJ_UINT32)(res->x1 - res->x0);  /* width of the resolution level computed */
834
0
  OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 - res->y0);  /* height of the resolution level computed */
835
836
0
  OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
837
838
0
  h.wavelet = (opj_v4_t*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(opj_v4_t));
839
0
  v.wavelet = h.wavelet;
840
841
0
  while( --numres) {
842
0
    OPJ_FLOAT32 * restrict aj = (OPJ_FLOAT32*) tilec->data;
843
0
    OPJ_UINT32 bufsize = (OPJ_UINT32)((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0));
844
0
    OPJ_INT32 j;
845
846
0
    h.sn = (OPJ_INT32)rw;
847
0
    v.sn = (OPJ_INT32)rh;
848
849
0
    ++res;
850
851
0
    rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */
852
0
    rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */
853
854
0
    h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
855
0
    h.cas = res->x0 % 2;
856
857
0
    for(j = (OPJ_INT32)rh; j > 3; j -= 4) {
858
0
      OPJ_INT32 k;
859
0
      opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize);
860
0
      opj_v4dwt_decode(&h);
861
862
0
      for(k = (OPJ_INT32)rw; --k >= 0;){
863
0
        aj[k               ] = h.wavelet[k].f[0];
864
0
        aj[k+(OPJ_INT32)w  ] = h.wavelet[k].f[1];
865
0
        aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2];
866
0
        aj[k+(OPJ_INT32)w*3] = h.wavelet[k].f[3];
867
0
      }
868
869
0
      aj += w*4;
870
0
      bufsize -= w*4;
871
0
    }
872
873
0
    if (rh & 0x03) {
874
0
      OPJ_INT32 k;
875
0
      j = rh & 0x03;
876
0
      opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize);
877
0
      opj_v4dwt_decode(&h);
878
0
      for(k = (OPJ_INT32)rw; --k >= 0;){
879
0
        switch(j) {
880
0
          case 3: aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2];
881
0
          case 2: aj[k+(OPJ_INT32)w  ] = h.wavelet[k].f[1];
882
0
          case 1: aj[k               ] = h.wavelet[k].f[0];
883
0
        }
884
0
      }
885
0
    }
886
887
0
    v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
888
0
    v.cas = res->y0 % 2;
889
890
0
    aj = (OPJ_FLOAT32*) tilec->data;
891
0
    for(j = (OPJ_INT32)rw; j > 3; j -= 4){
892
0
      OPJ_UINT32 k;
893
894
0
      opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, 4);
895
0
      opj_v4dwt_decode(&v);
896
897
0
      for(k = 0; k < rh; ++k){
898
0
        memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
899
0
      }
900
0
      aj += 4;
901
0
    }
902
903
0
    if (rw & 0x03){
904
0
      OPJ_UINT32 k;
905
906
0
      j = rw & 0x03;
907
908
0
      opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, j);
909
0
      opj_v4dwt_decode(&v);
910
911
0
      for(k = 0; k < rh; ++k){
912
0
        memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32));
913
0
      }
914
0
    }
915
0
  }
916
917
0
  opj_aligned_free(h.wavelet);
918
0
  return OPJ_TRUE;
919
0
}