/src/freeimage-svn/FreeImage/trunk/Source/LibOpenJPEG/mct.c
Line  | Count  | Source  | 
1  |  | /*  | 
2  |  |  * Copyright (c) 2002-2007, Communications and Remote Sensing Laboratory, Universite catholique de Louvain (UCL), Belgium  | 
3  |  |  * Copyright (c) 2002-2007, Professor Benoit Macq  | 
4  |  |  * Copyright (c) 2001-2003, David Janssens  | 
5  |  |  * Copyright (c) 2002-2003, Yannick Verschueren  | 
6  |  |  * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe  | 
7  |  |  * Copyright (c) 2005, Herve Drolon, FreeImage Team  | 
8  |  |  * Copyright (c) 2008;2011-2012, Centre National d'Etudes Spatiales (CNES), France   | 
9  |  |  * Copyright (c) 2012, CS Systemes d'Information, France  | 
10  |  |  * All rights reserved.  | 
11  |  |  *  | 
12  |  |  * Redistribution and use in source and binary forms, with or without  | 
13  |  |  * modification, are permitted provided that the following conditions  | 
14  |  |  * are met:  | 
15  |  |  * 1. Redistributions of source code must retain the above copyright  | 
16  |  |  *    notice, this list of conditions and the following disclaimer.  | 
17  |  |  * 2. Redistributions in binary form must reproduce the above copyright  | 
18  |  |  *    notice, this list of conditions and the following disclaimer in the  | 
19  |  |  *    documentation and/or other materials provided with the distribution.  | 
20  |  |  *  | 
21  |  |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'  | 
22  |  |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
23  |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
24  |  |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE  | 
25  |  |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  | 
26  |  |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF  | 
27  |  |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  | 
28  |  |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN  | 
29  |  |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)  | 
30  |  |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE  | 
31  |  |  * POSSIBILITY OF SUCH DAMAGE.  | 
32  |  |  */  | 
33  |  |  | 
34  |  | #ifdef __SSE__  | 
35  |  | #include <xmmintrin.h>  | 
36  |  | #endif  | 
37  |  |  | 
38  |  | #include "opj_includes.h"  | 
39  |  |  | 
40  |  | /* <summary> */  | 
41  |  | /* This table contains the norms of the basis function of the reversible MCT. */  | 
42  |  | /* </summary> */  | 
43  |  | static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; | 
44  |  |  | 
45  |  | /* <summary> */  | 
46  |  | /* This table contains the norms of the basis function of the irreversible MCT. */  | 
47  |  | /* </summary> */  | 
48  |  | static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; | 
49  |  |  | 
50  |  | const OPJ_FLOAT64 * opj_mct_get_mct_norms ()  | 
51  | 0  | { | 
52  | 0  |   return opj_mct_norms;  | 
53  | 0  | }  | 
54  |  |  | 
55  |  | const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()  | 
56  | 0  | { | 
57  | 0  |   return opj_mct_norms_real;  | 
58  | 0  | }  | 
59  |  |  | 
60  |  | /* <summary> */  | 
61  |  | /* Foward reversible MCT. */  | 
62  |  | /* </summary> */  | 
63  |  | void opj_mct_encode(  | 
64  |  |     OPJ_INT32* restrict c0,  | 
65  |  |     OPJ_INT32* restrict c1,  | 
66  |  |     OPJ_INT32* restrict c2,  | 
67  |  |     OPJ_UINT32 n)  | 
68  | 0  | { | 
69  | 0  |   OPJ_UINT32 i;  | 
70  | 0  |   for(i = 0; i < n; ++i) { | 
71  | 0  |     OPJ_INT32 r = c0[i];  | 
72  | 0  |     OPJ_INT32 g = c1[i];  | 
73  | 0  |     OPJ_INT32 b = c2[i];  | 
74  | 0  |     OPJ_INT32 y = (r + (g * 2) + b) >> 2;  | 
75  | 0  |     OPJ_INT32 u = b - g;  | 
76  | 0  |     OPJ_INT32 v = r - g;  | 
77  | 0  |     c0[i] = y;  | 
78  | 0  |     c1[i] = u;  | 
79  | 0  |     c2[i] = v;  | 
80  | 0  |   }  | 
81  | 0  | }  | 
82  |  |  | 
83  |  | /* <summary> */  | 
84  |  | /* Inverse reversible MCT. */  | 
85  |  | /* </summary> */  | 
86  |  | void opj_mct_decode(  | 
87  |  |     OPJ_INT32* restrict c0,  | 
88  |  |     OPJ_INT32* restrict c1,   | 
89  |  |     OPJ_INT32* restrict c2,   | 
90  |  |     OPJ_UINT32 n)  | 
91  | 0  | { | 
92  | 0  |   OPJ_UINT32 i;  | 
93  | 0  |   for (i = 0; i < n; ++i) { | 
94  | 0  |     OPJ_INT32 y = c0[i];  | 
95  | 0  |     OPJ_INT32 u = c1[i];  | 
96  | 0  |     OPJ_INT32 v = c2[i];  | 
97  | 0  |     OPJ_INT32 g = y - ((u + v) >> 2);  | 
98  | 0  |     OPJ_INT32 r = v + g;  | 
99  | 0  |     OPJ_INT32 b = u + g;  | 
100  | 0  |     c0[i] = r;  | 
101  | 0  |     c1[i] = g;  | 
102  | 0  |     c2[i] = b;  | 
103  | 0  |   }  | 
104  | 0  | }  | 
105  |  |  | 
106  |  | /* <summary> */  | 
107  |  | /* Get norm of basis function of reversible MCT. */  | 
108  |  | /* </summary> */  | 
109  | 0  | OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) { | 
110  | 0  |   return opj_mct_norms[compno];  | 
111  | 0  | }  | 
112  |  |  | 
113  |  | /* <summary> */  | 
114  |  | /* Foward irreversible MCT. */  | 
115  |  | /* </summary> */  | 
116  |  | void opj_mct_encode_real(  | 
117  |  |     OPJ_INT32* restrict c0,  | 
118  |  |     OPJ_INT32* restrict c1,  | 
119  |  |     OPJ_INT32* restrict c2,  | 
120  |  |     OPJ_UINT32 n)  | 
121  | 0  | { | 
122  | 0  |   OPJ_UINT32 i;  | 
123  | 0  |   for(i = 0; i < n; ++i) { | 
124  | 0  |     OPJ_INT32 r = c0[i];  | 
125  | 0  |     OPJ_INT32 g = c1[i];  | 
126  | 0  |     OPJ_INT32 b = c2[i];  | 
127  | 0  |     OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);  | 
128  | 0  |     OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);  | 
129  | 0  |     OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);  | 
130  | 0  |     c0[i] = y;  | 
131  | 0  |     c1[i] = u;  | 
132  | 0  |     c2[i] = v;  | 
133  | 0  |   }  | 
134  | 0  | }  | 
135  |  |  | 
136  |  | /* <summary> */  | 
137  |  | /* Inverse irreversible MCT. */  | 
138  |  | /* </summary> */  | 
139  |  | void opj_mct_decode_real(  | 
140  |  |     OPJ_FLOAT32* restrict c0,  | 
141  |  |     OPJ_FLOAT32* restrict c1,  | 
142  |  |     OPJ_FLOAT32* restrict c2,  | 
143  |  |     OPJ_UINT32 n)  | 
144  | 0  | { | 
145  | 0  |   OPJ_UINT32 i;  | 
146  | 0  | #ifdef __SSE__  | 
147  | 0  |   __m128 vrv, vgu, vgv, vbu;  | 
148  | 0  |   vrv = _mm_set1_ps(1.402f);  | 
149  | 0  |   vgu = _mm_set1_ps(0.34413f);  | 
150  | 0  |   vgv = _mm_set1_ps(0.71414f);  | 
151  | 0  |   vbu = _mm_set1_ps(1.772f);  | 
152  | 0  |   for (i = 0; i < (n >> 3); ++i) { | 
153  | 0  |     __m128 vy, vu, vv;  | 
154  | 0  |     __m128 vr, vg, vb;  | 
155  |  | 
  | 
156  | 0  |     vy = _mm_load_ps(c0);  | 
157  | 0  |     vu = _mm_load_ps(c1);  | 
158  | 0  |     vv = _mm_load_ps(c2);  | 
159  | 0  |     vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));  | 
160  | 0  |     vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));  | 
161  | 0  |     vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));  | 
162  | 0  |     _mm_store_ps(c0, vr);  | 
163  | 0  |     _mm_store_ps(c1, vg);  | 
164  | 0  |     _mm_store_ps(c2, vb);  | 
165  | 0  |     c0 += 4;  | 
166  | 0  |     c1 += 4;  | 
167  | 0  |     c2 += 4;  | 
168  |  | 
  | 
169  | 0  |     vy = _mm_load_ps(c0);  | 
170  | 0  |     vu = _mm_load_ps(c1);  | 
171  | 0  |     vv = _mm_load_ps(c2);  | 
172  | 0  |     vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));  | 
173  | 0  |     vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));  | 
174  | 0  |     vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));  | 
175  | 0  |     _mm_store_ps(c0, vr);  | 
176  | 0  |     _mm_store_ps(c1, vg);  | 
177  | 0  |     _mm_store_ps(c2, vb);  | 
178  | 0  |     c0 += 4;  | 
179  | 0  |     c1 += 4;  | 
180  | 0  |     c2 += 4;  | 
181  | 0  |   }  | 
182  | 0  |   n &= 7;  | 
183  | 0  | #endif  | 
184  | 0  |   for(i = 0; i < n; ++i) { | 
185  | 0  |     OPJ_FLOAT32 y = c0[i];  | 
186  | 0  |     OPJ_FLOAT32 u = c1[i];  | 
187  | 0  |     OPJ_FLOAT32 v = c2[i];  | 
188  | 0  |     OPJ_FLOAT32 r = y + (v * 1.402f);  | 
189  | 0  |     OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));  | 
190  | 0  |     OPJ_FLOAT32 b = y + (u * 1.772f);  | 
191  | 0  |     c0[i] = r;  | 
192  | 0  |     c1[i] = g;  | 
193  | 0  |     c2[i] = b;  | 
194  | 0  |   }  | 
195  | 0  | }  | 
196  |  |  | 
197  |  | /* <summary> */  | 
198  |  | /* Get norm of basis function of irreversible MCT. */  | 
199  |  | /* </summary> */  | 
200  | 0  | OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) { | 
201  | 0  |   return opj_mct_norms_real[compno];  | 
202  | 0  | }  | 
203  |  |  | 
204  |  |  | 
205  |  | OPJ_BOOL opj_mct_encode_custom(  | 
206  |  |              OPJ_BYTE * pCodingdata,  | 
207  |  |              OPJ_UINT32 n,  | 
208  |  |              OPJ_BYTE ** pData,  | 
209  |  |              OPJ_UINT32 pNbComp,  | 
210  |  |              OPJ_UINT32 isSigned)  | 
211  | 0  | { | 
212  | 0  |   OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;  | 
213  | 0  |   OPJ_UINT32 i;  | 
214  | 0  |   OPJ_UINT32 j;  | 
215  | 0  |   OPJ_UINT32 k;  | 
216  | 0  |   OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;  | 
217  | 0  |   OPJ_INT32 * lCurrentData = 00;  | 
218  | 0  |   OPJ_INT32 * lCurrentMatrix = 00;  | 
219  | 0  |   OPJ_INT32 ** lData = (OPJ_INT32 **) pData;  | 
220  | 0  |   OPJ_UINT32 lMultiplicator = 1 << 13;  | 
221  | 0  |   OPJ_INT32 * lMctPtr;  | 
222  |  | 
  | 
223  | 0  |     OPJ_ARG_NOT_USED(isSigned);  | 
224  |  | 
  | 
225  | 0  |   lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32));  | 
226  | 0  |   if (! lCurrentData) { | 
227  | 0  |     return OPJ_FALSE;  | 
228  | 0  |   }  | 
229  |  |  | 
230  | 0  |   lCurrentMatrix = lCurrentData + pNbComp;  | 
231  |  | 
  | 
232  | 0  |   for (i =0;i<lNbMatCoeff;++i) { | 
233  | 0  |     lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * (OPJ_FLOAT32)lMultiplicator);  | 
234  | 0  |   }  | 
235  |  | 
  | 
236  | 0  |   for (i = 0; i < n; ++i)  { | 
237  | 0  |     lMctPtr = lCurrentMatrix;  | 
238  | 0  |     for (j=0;j<pNbComp;++j) { | 
239  | 0  |       lCurrentData[j] = (*(lData[j]));  | 
240  | 0  |     }  | 
241  |  | 
  | 
242  | 0  |     for (j=0;j<pNbComp;++j) { | 
243  | 0  |       *(lData[j]) = 0;  | 
244  | 0  |       for (k=0;k<pNbComp;++k) { | 
245  | 0  |         *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);  | 
246  | 0  |         ++lMctPtr;  | 
247  | 0  |       }  | 
248  |  | 
  | 
249  | 0  |       ++lData[j];  | 
250  | 0  |     }  | 
251  | 0  |   }  | 
252  |  | 
  | 
253  | 0  |   opj_free(lCurrentData);  | 
254  |  | 
  | 
255  | 0  |   return OPJ_TRUE;  | 
256  | 0  | }  | 
257  |  |  | 
258  |  | OPJ_BOOL opj_mct_decode_custom(  | 
259  |  |              OPJ_BYTE * pDecodingData,  | 
260  |  |              OPJ_UINT32 n,  | 
261  |  |              OPJ_BYTE ** pData,  | 
262  |  |              OPJ_UINT32 pNbComp,  | 
263  |  |              OPJ_UINT32 isSigned)  | 
264  | 0  | { | 
265  | 0  |   OPJ_FLOAT32 * lMct;  | 
266  | 0  |   OPJ_UINT32 i;  | 
267  | 0  |   OPJ_UINT32 j;  | 
268  | 0  |   OPJ_UINT32 k;  | 
269  |  | 
  | 
270  | 0  |   OPJ_FLOAT32 * lCurrentData = 00;  | 
271  | 0  |   OPJ_FLOAT32 * lCurrentResult = 00;  | 
272  | 0  |   OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;  | 
273  |  | 
  | 
274  | 0  |     OPJ_ARG_NOT_USED(isSigned);  | 
275  |  | 
  | 
276  | 0  |   lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32));  | 
277  | 0  |   if (! lCurrentData) { | 
278  | 0  |     return OPJ_FALSE;  | 
279  | 0  |   }  | 
280  | 0  |   lCurrentResult = lCurrentData + pNbComp;  | 
281  |  | 
  | 
282  | 0  |   for (i = 0; i < n; ++i) { | 
283  | 0  |     lMct = (OPJ_FLOAT32 *) pDecodingData;  | 
284  | 0  |     for (j=0;j<pNbComp;++j) { | 
285  | 0  |       lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j]));  | 
286  | 0  |     }  | 
287  | 0  |     for (j=0;j<pNbComp;++j) { | 
288  | 0  |       lCurrentResult[j] = 0;  | 
289  | 0  |       for (k=0;k<pNbComp;++k) { | 
290  | 0  |         lCurrentResult[j] += *(lMct++) * lCurrentData[k];  | 
291  | 0  |       }  | 
292  | 0  |       *(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]);  | 
293  | 0  |     }  | 
294  | 0  |   }  | 
295  | 0  |   opj_free(lCurrentData);  | 
296  | 0  |   return OPJ_TRUE;  | 
297  | 0  | }  | 
298  |  |  | 
299  |  | void opj_calculate_norms( OPJ_FLOAT64 * pNorms,  | 
300  |  |               OPJ_UINT32 pNbComps,  | 
301  |  |               OPJ_FLOAT32 * pMatrix)  | 
302  | 0  | { | 
303  | 0  |   OPJ_UINT32 i,j,lIndex;  | 
304  | 0  |   OPJ_FLOAT32 lCurrentValue;  | 
305  | 0  |   OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;  | 
306  | 0  |   OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;  | 
307  |  | 
  | 
308  | 0  |   for (i=0;i<pNbComps;++i) { | 
309  | 0  |     lNorms[i] = 0;  | 
310  | 0  |     lIndex = i;  | 
311  |  | 
  | 
312  | 0  |     for (j=0;j<pNbComps;++j) { | 
313  | 0  |       lCurrentValue = lMatrix[lIndex];  | 
314  | 0  |       lIndex += pNbComps;  | 
315  | 0  |       lNorms[i] += lCurrentValue * lCurrentValue;  | 
316  | 0  |     }  | 
317  | 0  |     lNorms[i] = sqrt(lNorms[i]);  | 
318  | 0  |   }  | 
319  | 0  | }  |