/src/libjpeg-turbo.main/jfdctint.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |  * jfdctint.c | 
| 3 |  |  * | 
| 4 |  |  * This file was part of the Independent JPEG Group's software: | 
| 5 |  |  * Copyright (C) 1991-1996, Thomas G. Lane. | 
| 6 |  |  * libjpeg-turbo Modifications: | 
| 7 |  |  * Copyright (C) 2015, 2020, 2022, D. R. Commander. | 
| 8 |  |  * For conditions of distribution and use, see the accompanying README.ijg | 
| 9 |  |  * file. | 
| 10 |  |  * | 
| 11 |  |  * This file contains a slower but more accurate integer implementation of the | 
| 12 |  |  * forward DCT (Discrete Cosine Transform). | 
| 13 |  |  * | 
| 14 |  |  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT | 
| 15 |  |  * on each column.  Direct algorithms are also available, but they are | 
| 16 |  |  * much more complex and seem not to be any faster when reduced to code. | 
| 17 |  |  * | 
| 18 |  |  * This implementation is based on an algorithm described in | 
| 19 |  |  *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | 
| 20 |  |  *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | 
| 21 |  |  *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | 
| 22 |  |  * The primary algorithm described there uses 11 multiplies and 29 adds. | 
| 23 |  |  * We use their alternate method with 12 multiplies and 32 adds. | 
| 24 |  |  * The advantage of this method is that no data path contains more than one | 
| 25 |  |  * multiplication; this allows a very simple and accurate implementation in | 
| 26 |  |  * scaled fixed-point arithmetic, with a minimal number of shifts. | 
| 27 |  |  */ | 
| 28 |  |  | 
| 29 |  | #define JPEG_INTERNALS | 
| 30 |  | #include "jinclude.h" | 
| 31 |  | #include "jpeglib.h" | 
| 32 |  | #include "jdct.h"               /* Private declarations for DCT subsystem */ | 
| 33 |  |  | 
| 34 |  | #ifdef DCT_ISLOW_SUPPORTED | 
| 35 |  |  | 
| 36 |  |  | 
| 37 |  | /* | 
| 38 |  |  * This module is specialized to the case DCTSIZE = 8. | 
| 39 |  |  */ | 
| 40 |  |  | 
| 41 |  | #if DCTSIZE != 8 | 
| 42 |  |   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | 
| 43 |  | #endif | 
| 44 |  |  | 
| 45 |  |  | 
| 46 |  | /* | 
| 47 |  |  * The poop on this scaling stuff is as follows: | 
| 48 |  |  * | 
| 49 |  |  * Each 1-D DCT step produces outputs which are a factor of sqrt(N) | 
| 50 |  |  * larger than the true DCT outputs.  The final outputs are therefore | 
| 51 |  |  * a factor of N larger than desired; since N=8 this can be cured by | 
| 52 |  |  * a simple right shift at the end of the algorithm.  The advantage of | 
| 53 |  |  * this arrangement is that we save two multiplications per 1-D DCT, | 
| 54 |  |  * because the y0 and y4 outputs need not be divided by sqrt(N). | 
| 55 |  |  * In the IJG code, this factor of 8 is removed by the quantization step | 
| 56 |  |  * (in jcdctmgr.c), NOT in this module. | 
| 57 |  |  * | 
| 58 |  |  * We have to do addition and subtraction of the integer inputs, which | 
| 59 |  |  * is no problem, and multiplication by fractional constants, which is | 
| 60 |  |  * a problem to do in integer arithmetic.  We multiply all the constants | 
| 61 |  |  * by CONST_SCALE and convert them to integer constants (thus retaining | 
| 62 |  |  * CONST_BITS bits of precision in the constants).  After doing a | 
| 63 |  |  * multiplication we have to divide the product by CONST_SCALE, with proper | 
| 64 |  |  * rounding, to produce the correct output.  This division can be done | 
| 65 |  |  * cheaply as a right shift of CONST_BITS bits.  We postpone shifting | 
| 66 |  |  * as long as possible so that partial sums can be added together with | 
| 67 |  |  * full fractional precision. | 
| 68 |  |  * | 
| 69 |  |  * The outputs of the first pass are scaled up by PASS1_BITS bits so that | 
| 70 |  |  * they are represented to better-than-integral precision.  These outputs | 
| 71 |  |  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | 
| 72 |  |  * with the recommended scaling.  (For 12-bit sample data, the intermediate | 
| 73 |  |  * array is JLONG anyway.) | 
| 74 |  |  * | 
| 75 |  |  * To avoid overflow of the 32-bit intermediate results in pass 2, we must | 
| 76 |  |  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis | 
| 77 |  |  * shows that the values given below are the most effective. | 
| 78 |  |  */ | 
| 79 |  |  | 
| 80 |  | #if BITS_IN_JSAMPLE == 8 | 
| 81 |  | #define CONST_BITS  13 | 
| 82 |  | #define PASS1_BITS  2 | 
| 83 |  | #else | 
| 84 |  | #define CONST_BITS  13 | 
| 85 |  | #define PASS1_BITS  1           /* lose a little precision to avoid overflow */ | 
| 86 |  | #endif | 
| 87 |  |  | 
| 88 |  | /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | 
| 89 |  |  * causing a lot of useless floating-point operations at run time. | 
| 90 |  |  * To get around this we use the following pre-calculated constants. | 
| 91 |  |  * If you change CONST_BITS you may want to add appropriate values. | 
| 92 |  |  * (With a reasonable C compiler, you can just rely on the FIX() macro...) | 
| 93 |  |  */ | 
| 94 |  |  | 
| 95 |  | #if CONST_BITS == 13 | 
| 96 |  | #define FIX_0_298631336  ((JLONG)2446)          /* FIX(0.298631336) */ | 
| 97 |  | #define FIX_0_390180644  ((JLONG)3196)          /* FIX(0.390180644) */ | 
| 98 |  | #define FIX_0_541196100  ((JLONG)4433)          /* FIX(0.541196100) */ | 
| 99 |  | #define FIX_0_765366865  ((JLONG)6270)          /* FIX(0.765366865) */ | 
| 100 |  | #define FIX_0_899976223  ((JLONG)7373)          /* FIX(0.899976223) */ | 
| 101 |  | #define FIX_1_175875602  ((JLONG)9633)          /* FIX(1.175875602) */ | 
| 102 |  | #define FIX_1_501321110  ((JLONG)12299)         /* FIX(1.501321110) */ | 
| 103 |  | #define FIX_1_847759065  ((JLONG)15137)         /* FIX(1.847759065) */ | 
| 104 |  | #define FIX_1_961570560  ((JLONG)16069)         /* FIX(1.961570560) */ | 
| 105 |  | #define FIX_2_053119869  ((JLONG)16819)         /* FIX(2.053119869) */ | 
| 106 |  | #define FIX_2_562915447  ((JLONG)20995)         /* FIX(2.562915447) */ | 
| 107 |  | #define FIX_3_072711026  ((JLONG)25172)         /* FIX(3.072711026) */ | 
| 108 |  | #else | 
| 109 |  | #define FIX_0_298631336  FIX(0.298631336) | 
| 110 |  | #define FIX_0_390180644  FIX(0.390180644) | 
| 111 |  | #define FIX_0_541196100  FIX(0.541196100) | 
| 112 |  | #define FIX_0_765366865  FIX(0.765366865) | 
| 113 |  | #define FIX_0_899976223  FIX(0.899976223) | 
| 114 |  | #define FIX_1_175875602  FIX(1.175875602) | 
| 115 |  | #define FIX_1_501321110  FIX(1.501321110) | 
| 116 |  | #define FIX_1_847759065  FIX(1.847759065) | 
| 117 |  | #define FIX_1_961570560  FIX(1.961570560) | 
| 118 |  | #define FIX_2_053119869  FIX(2.053119869) | 
| 119 |  | #define FIX_2_562915447  FIX(2.562915447) | 
| 120 |  | #define FIX_3_072711026  FIX(3.072711026) | 
| 121 |  | #endif | 
| 122 |  |  | 
| 123 |  |  | 
| 124 |  | /* Multiply an JLONG variable by an JLONG constant to yield an JLONG result. | 
| 125 |  |  * For 8-bit samples with the recommended scaling, all the variable | 
| 126 |  |  * and constant values involved are no more than 16 bits wide, so a | 
| 127 |  |  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. | 
| 128 |  |  * For 12-bit samples, a full 32-bit multiplication will be needed. | 
| 129 |  |  */ | 
| 130 |  |  | 
| 131 |  | #if BITS_IN_JSAMPLE == 8 | 
| 132 | 0 | #define MULTIPLY(var, const)  MULTIPLY16C16(var, const) | 
| 133 |  | #else | 
| 134 | 2.37G | #define MULTIPLY(var, const)  ((var) * (const)) | 
| 135 |  | #endif | 
| 136 |  |  | 
| 137 |  |  | 
| 138 |  | /* | 
| 139 |  |  * Perform the forward DCT on one block of samples. | 
| 140 |  |  */ | 
| 141 |  |  | 
| 142 |  | GLOBAL(void) | 
| 143 |  | _jpeg_fdct_islow(DCTELEM *data) | 
| 144 | 14.8M | { | 
| 145 | 14.8M |   JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | 
| 146 | 14.8M |   JLONG tmp10, tmp11, tmp12, tmp13; | 
| 147 | 14.8M |   JLONG z1, z2, z3, z4, z5; | 
| 148 | 14.8M |   DCTELEM *dataptr; | 
| 149 | 14.8M |   int ctr; | 
| 150 | 14.8M |   SHIFT_TEMPS | 
| 151 |  |  | 
| 152 |  |   /* Pass 1: process rows. */ | 
| 153 |  |   /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | 
| 154 |  |   /* furthermore, we scale the results by 2**PASS1_BITS. */ | 
| 155 |  |  | 
| 156 | 14.8M |   dataptr = data; | 
| 157 | 133M |   for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { | 
| 158 | 118M |     tmp0 = dataptr[0] + dataptr[7]; | 
| 159 | 118M |     tmp7 = dataptr[0] - dataptr[7]; | 
| 160 | 118M |     tmp1 = dataptr[1] + dataptr[6]; | 
| 161 | 118M |     tmp6 = dataptr[1] - dataptr[6]; | 
| 162 | 118M |     tmp2 = dataptr[2] + dataptr[5]; | 
| 163 | 118M |     tmp5 = dataptr[2] - dataptr[5]; | 
| 164 | 118M |     tmp3 = dataptr[3] + dataptr[4]; | 
| 165 | 118M |     tmp4 = dataptr[3] - dataptr[4]; | 
| 166 |  |  | 
| 167 |  |     /* Even part per LL&M figure 1 --- note that published figure is faulty; | 
| 168 |  |      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | 
| 169 |  |      */ | 
| 170 |  |  | 
| 171 | 118M |     tmp10 = tmp0 + tmp3; | 
| 172 | 118M |     tmp13 = tmp0 - tmp3; | 
| 173 | 118M |     tmp11 = tmp1 + tmp2; | 
| 174 | 118M |     tmp12 = tmp1 - tmp2; | 
| 175 |  |  | 
| 176 | 118M |     dataptr[0] = (DCTELEM)LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); | 
| 177 | 118M |     dataptr[4] = (DCTELEM)LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); | 
| 178 |  |  | 
| 179 | 118M |     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | 
| 180 | 118M |     dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | 
| 181 | 118M |                                   CONST_BITS - PASS1_BITS); | 
| 182 | 118M |     dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065), | 
| 183 | 118M |                                   CONST_BITS - PASS1_BITS); | 
| 184 |  |  | 
| 185 |  |     /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | 
| 186 |  |      * cK represents cos(K*pi/16). | 
| 187 |  |      * i0..i3 in the paper are tmp4..tmp7 here. | 
| 188 |  |      */ | 
| 189 |  |  | 
| 190 | 118M |     z1 = tmp4 + tmp7; | 
| 191 | 118M |     z2 = tmp5 + tmp6; | 
| 192 | 118M |     z3 = tmp4 + tmp6; | 
| 193 | 118M |     z4 = tmp5 + tmp7; | 
| 194 | 118M |     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ | 
| 195 |  |  | 
| 196 | 118M |     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ | 
| 197 | 118M |     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ | 
| 198 | 118M |     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ | 
| 199 | 118M |     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ | 
| 200 | 118M |     z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ | 
| 201 | 118M |     z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ | 
| 202 | 118M |     z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ | 
| 203 | 118M |     z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ | 
| 204 |  |  | 
| 205 | 118M |     z3 += z5; | 
| 206 | 118M |     z4 += z5; | 
| 207 |  |  | 
| 208 | 118M |     dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS); | 
| 209 | 118M |     dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS); | 
| 210 | 118M |     dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS); | 
| 211 | 118M |     dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS); | 
| 212 |  |  | 
| 213 | 118M |     dataptr += DCTSIZE;         /* advance pointer to next row */ | 
| 214 | 118M |   } | 
| 215 |  |  | 
| 216 |  |   /* Pass 2: process columns. | 
| 217 |  |    * We remove the PASS1_BITS scaling, but leave the results scaled up | 
| 218 |  |    * by an overall factor of 8. | 
| 219 |  |    */ | 
| 220 |  |  | 
| 221 | 14.8M |   dataptr = data; | 
| 222 | 133M |   for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { | 
| 223 | 118M |     tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7]; | 
| 224 | 118M |     tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7]; | 
| 225 | 118M |     tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6]; | 
| 226 | 118M |     tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6]; | 
| 227 | 118M |     tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5]; | 
| 228 | 118M |     tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5]; | 
| 229 | 118M |     tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4]; | 
| 230 | 118M |     tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4]; | 
| 231 |  |  | 
| 232 |  |     /* Even part per LL&M figure 1 --- note that published figure is faulty; | 
| 233 |  |      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | 
| 234 |  |      */ | 
| 235 |  |  | 
| 236 | 118M |     tmp10 = tmp0 + tmp3; | 
| 237 | 118M |     tmp13 = tmp0 - tmp3; | 
| 238 | 118M |     tmp11 = tmp1 + tmp2; | 
| 239 | 118M |     tmp12 = tmp1 - tmp2; | 
| 240 |  |  | 
| 241 | 118M |     dataptr[DCTSIZE * 0] = (DCTELEM)DESCALE(tmp10 + tmp11, PASS1_BITS); | 
| 242 | 118M |     dataptr[DCTSIZE * 4] = (DCTELEM)DESCALE(tmp10 - tmp11, PASS1_BITS); | 
| 243 |  |  | 
| 244 | 118M |     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | 
| 245 | 118M |     dataptr[DCTSIZE * 2] = | 
| 246 | 118M |       (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | 
| 247 | 118M |                        CONST_BITS + PASS1_BITS); | 
| 248 | 118M |     dataptr[DCTSIZE * 6] = | 
| 249 | 118M |       (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065), | 
| 250 | 118M |                        CONST_BITS + PASS1_BITS); | 
| 251 |  |  | 
| 252 |  |     /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | 
| 253 |  |      * cK represents cos(K*pi/16). | 
| 254 |  |      * i0..i3 in the paper are tmp4..tmp7 here. | 
| 255 |  |      */ | 
| 256 |  |  | 
| 257 | 118M |     z1 = tmp4 + tmp7; | 
| 258 | 118M |     z2 = tmp5 + tmp6; | 
| 259 | 118M |     z3 = tmp4 + tmp6; | 
| 260 | 118M |     z4 = tmp5 + tmp7; | 
| 261 | 118M |     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ | 
| 262 |  |  | 
| 263 | 118M |     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ | 
| 264 | 118M |     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ | 
| 265 | 118M |     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ | 
| 266 | 118M |     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ | 
| 267 | 118M |     z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ | 
| 268 | 118M |     z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ | 
| 269 | 118M |     z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ | 
| 270 | 118M |     z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ | 
| 271 |  |  | 
| 272 | 118M |     z3 += z5; | 
| 273 | 118M |     z4 += z5; | 
| 274 |  |  | 
| 275 | 118M |     dataptr[DCTSIZE * 7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, | 
| 276 | 118M |                                             CONST_BITS + PASS1_BITS); | 
| 277 | 118M |     dataptr[DCTSIZE * 5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, | 
| 278 | 118M |                                             CONST_BITS + PASS1_BITS); | 
| 279 | 118M |     dataptr[DCTSIZE * 3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, | 
| 280 | 118M |                                             CONST_BITS + PASS1_BITS); | 
| 281 | 118M |     dataptr[DCTSIZE * 1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, | 
| 282 | 118M |                                             CONST_BITS + PASS1_BITS); | 
| 283 |  |  | 
| 284 | 118M |     dataptr++;                  /* advance pointer to next column */ | 
| 285 | 118M |   } | 
| 286 | 14.8M | } | Line | Count | Source |  | 144 | 14.8M | { |  | 145 | 14.8M |   JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |  | 146 | 14.8M |   JLONG tmp10, tmp11, tmp12, tmp13; |  | 147 | 14.8M |   JLONG z1, z2, z3, z4, z5; |  | 148 | 14.8M |   DCTELEM *dataptr; |  | 149 | 14.8M |   int ctr; |  | 150 | 14.8M |   SHIFT_TEMPS |  | 151 |  |  |  | 152 |  |   /* Pass 1: process rows. */ |  | 153 |  |   /* Note results are scaled up by sqrt(8) compared to a true DCT; */ |  | 154 |  |   /* furthermore, we scale the results by 2**PASS1_BITS. */ |  | 155 |  |  |  | 156 | 14.8M |   dataptr = data; |  | 157 | 133M |   for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { |  | 158 | 118M |     tmp0 = dataptr[0] + dataptr[7]; |  | 159 | 118M |     tmp7 = dataptr[0] - dataptr[7]; |  | 160 | 118M |     tmp1 = dataptr[1] + dataptr[6]; |  | 161 | 118M |     tmp6 = dataptr[1] - dataptr[6]; |  | 162 | 118M |     tmp2 = dataptr[2] + dataptr[5]; |  | 163 | 118M |     tmp5 = dataptr[2] - dataptr[5]; |  | 164 | 118M |     tmp3 = dataptr[3] + dataptr[4]; |  | 165 | 118M |     tmp4 = dataptr[3] - dataptr[4]; |  | 166 |  |  |  | 167 |  |     /* Even part per LL&M figure 1 --- note that published figure is faulty; |  | 168 |  |      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". |  | 169 |  |      */ |  | 170 |  |  |  | 171 | 118M |     tmp10 = tmp0 + tmp3; |  | 172 | 118M |     tmp13 = tmp0 - tmp3; |  | 173 | 118M |     tmp11 = tmp1 + tmp2; |  | 174 | 118M |     tmp12 = tmp1 - tmp2; |  | 175 |  |  |  | 176 | 118M |     dataptr[0] = (DCTELEM)LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); |  | 177 | 118M |     dataptr[4] = (DCTELEM)LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); |  | 178 |  |  |  | 179 | 118M |     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); |  | 180 | 118M |     dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), |  | 181 | 118M |                                   CONST_BITS - PASS1_BITS); |  | 182 | 118M |     dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065), |  | 183 | 118M |                                   CONST_BITS - PASS1_BITS); |  | 184 |  |  |  | 185 |  |     /* Odd part per figure 8 --- note paper omits factor of sqrt(2). |  | 186 |  |      * cK represents cos(K*pi/16). |  | 187 |  |      * i0..i3 in the paper are tmp4..tmp7 here. |  | 188 |  |      */ |  | 189 |  |  |  | 190 | 118M |     z1 = tmp4 + tmp7; |  | 191 | 118M |     z2 = tmp5 + tmp6; |  | 192 | 118M |     z3 = tmp4 + tmp6; |  | 193 | 118M |     z4 = tmp5 + tmp7; |  | 194 | 118M |     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ |  | 195 |  |  |  | 196 | 118M |     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |  | 197 | 118M |     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |  | 198 | 118M |     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |  | 199 | 118M |     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |  | 200 | 118M |     z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ |  | 201 | 118M |     z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |  | 202 | 118M |     z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |  | 203 | 118M |     z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ |  | 204 |  |  |  | 205 | 118M |     z3 += z5; |  | 206 | 118M |     z4 += z5; |  | 207 |  |  |  | 208 | 118M |     dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS); |  | 209 | 118M |     dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS); |  | 210 | 118M |     dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS); |  | 211 | 118M |     dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS); |  | 212 |  |  |  | 213 | 118M |     dataptr += DCTSIZE;         /* advance pointer to next row */ |  | 214 | 118M |   } |  | 215 |  |  |  | 216 |  |   /* Pass 2: process columns. |  | 217 |  |    * We remove the PASS1_BITS scaling, but leave the results scaled up |  | 218 |  |    * by an overall factor of 8. |  | 219 |  |    */ |  | 220 |  |  |  | 221 | 14.8M |   dataptr = data; |  | 222 | 133M |   for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { |  | 223 | 118M |     tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7]; |  | 224 | 118M |     tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7]; |  | 225 | 118M |     tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6]; |  | 226 | 118M |     tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6]; |  | 227 | 118M |     tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5]; |  | 228 | 118M |     tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5]; |  | 229 | 118M |     tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4]; |  | 230 | 118M |     tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4]; |  | 231 |  |  |  | 232 |  |     /* Even part per LL&M figure 1 --- note that published figure is faulty; |  | 233 |  |      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". |  | 234 |  |      */ |  | 235 |  |  |  | 236 | 118M |     tmp10 = tmp0 + tmp3; |  | 237 | 118M |     tmp13 = tmp0 - tmp3; |  | 238 | 118M |     tmp11 = tmp1 + tmp2; |  | 239 | 118M |     tmp12 = tmp1 - tmp2; |  | 240 |  |  |  | 241 | 118M |     dataptr[DCTSIZE * 0] = (DCTELEM)DESCALE(tmp10 + tmp11, PASS1_BITS); |  | 242 | 118M |     dataptr[DCTSIZE * 4] = (DCTELEM)DESCALE(tmp10 - tmp11, PASS1_BITS); |  | 243 |  |  |  | 244 | 118M |     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); |  | 245 | 118M |     dataptr[DCTSIZE * 2] = |  | 246 | 118M |       (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), |  | 247 | 118M |                        CONST_BITS + PASS1_BITS); |  | 248 | 118M |     dataptr[DCTSIZE * 6] = |  | 249 | 118M |       (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065), |  | 250 | 118M |                        CONST_BITS + PASS1_BITS); |  | 251 |  |  |  | 252 |  |     /* Odd part per figure 8 --- note paper omits factor of sqrt(2). |  | 253 |  |      * cK represents cos(K*pi/16). |  | 254 |  |      * i0..i3 in the paper are tmp4..tmp7 here. |  | 255 |  |      */ |  | 256 |  |  |  | 257 | 118M |     z1 = tmp4 + tmp7; |  | 258 | 118M |     z2 = tmp5 + tmp6; |  | 259 | 118M |     z3 = tmp4 + tmp6; |  | 260 | 118M |     z4 = tmp5 + tmp7; |  | 261 | 118M |     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ |  | 262 |  |  |  | 263 | 118M |     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |  | 264 | 118M |     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |  | 265 | 118M |     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |  | 266 | 118M |     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |  | 267 | 118M |     z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ |  | 268 | 118M |     z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |  | 269 | 118M |     z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |  | 270 | 118M |     z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ |  | 271 |  |  |  | 272 | 118M |     z3 += z5; |  | 273 | 118M |     z4 += z5; |  | 274 |  |  |  | 275 | 118M |     dataptr[DCTSIZE * 7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, |  | 276 | 118M |                                             CONST_BITS + PASS1_BITS); |  | 277 | 118M |     dataptr[DCTSIZE * 5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, |  | 278 | 118M |                                             CONST_BITS + PASS1_BITS); |  | 279 | 118M |     dataptr[DCTSIZE * 3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, |  | 280 | 118M |                                             CONST_BITS + PASS1_BITS); |  | 281 | 118M |     dataptr[DCTSIZE * 1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, |  | 282 | 118M |                                             CONST_BITS + PASS1_BITS); |  | 283 |  |  |  | 284 | 118M |     dataptr++;                  /* advance pointer to next column */ |  | 285 | 118M |   } |  | 286 | 14.8M | } | 
Unexecuted instantiation: jpeg_fdct_islow | 
| 287 |  |  | 
| 288 |  | #endif /* DCT_ISLOW_SUPPORTED */ |