/src/guetzli/guetzli/idct.cc

Source
/*
 * Copyright 2016 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Integer implementation of the Inverse Discrete Cosine Transform (IDCT).

#include "guetzli/idct.h"

#include <algorithm>
#include <math.h>

namespace guetzli {

// kIDCTMatrix[8*x+u] = alpha(u)*cos((2*x+1)*u*M_PI/16)*sqrt(2), with fixed 13
// bit precision, where alpha(0) = 1/sqrt(2) and alpha(u) = 1 for u > 0.
// Some coefficients are off by +-1 to mimick libjpeg's behaviour.
static const int kIDCTMatrix[kDCTBlockSize] = {
  8192,  11363,  10703,   9633,   8192,   6437,   4433,   2260,
  8192,   9633,   4433,  -2259,  -8192, -11362, -10704,  -6436,
  8192,   6437,  -4433, -11362,  -8192,   2261,  10704,   9633,
  8192,   2260, -10703,  -6436,   8192,   9633,  -4433, -11363,
  8192,  -2260, -10703,   6436,   8192,  -9633,  -4433,  11363,
  8192,  -6437,  -4433,  11362,  -8192,  -2261,  10704,  -9633,
  8192,  -9633,   4433,   2259,  -8192,  11362, -10704,   6436,
  8192, -11363,  10703,  -9633,   8192,  -6437,   4433,  -2260,
};

// Computes out[x] = sum{kIDCTMatrix[8*x+u]*in[u*stride]; for u in [0..7]}
inline void Compute1dIDCT(const coeff_t* in, const int stride, int out[8]) {
  int tmp0, tmp1, tmp2, tmp3, tmp4;

  tmp1 = kIDCTMatrix[0] * in[0];
  out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = tmp1;

  tmp0 = in[stride];
  tmp1 = kIDCTMatrix[ 1] * tmp0;
  tmp2 = kIDCTMatrix[ 9] * tmp0;
  tmp3 = kIDCTMatrix[17] * tmp0;
  tmp4 = kIDCTMatrix[25] * tmp0;
  out[0] += tmp1;
  out[1] += tmp2;
  out[2] += tmp3;
  out[3] += tmp4;
  out[4] -= tmp4;
  out[5] -= tmp3;
  out[6] -= tmp2;
  out[7] -= tmp1;

  tmp0 = in[2 * stride];
  tmp1 = kIDCTMatrix[ 2] * tmp0;
  tmp2 = kIDCTMatrix[10] * tmp0;
  out[0] += tmp1;
  out[1] += tmp2;
  out[2] -= tmp2;
  out[3] -= tmp1;
  out[4] -= tmp1;
  out[5] -= tmp2;
  out[6] += tmp2;
  out[7] += tmp1;

  tmp0 = in[3 * stride];
  tmp1 = kIDCTMatrix[ 3] * tmp0;
  tmp2 = kIDCTMatrix[11] * tmp0;
  tmp3 = kIDCTMatrix[19] * tmp0;
  tmp4 = kIDCTMatrix[27] * tmp0;
  out[0] += tmp1;
  out[1] += tmp2;
  out[2] += tmp3;
  out[3] += tmp4;
  out[4] -= tmp4;
  out[5] -= tmp3;
  out[6] -= tmp2;
  out[7] -= tmp1;

  tmp0 = in[4 * stride];
  tmp1 = kIDCTMatrix[ 4] * tmp0;
  out[0] += tmp1;
  out[1] -= tmp1;
  out[2] -= tmp1;
  out[3] += tmp1;
  out[4] += tmp1;
  out[5] -= tmp1;
  out[6] -= tmp1;
  out[7] += tmp1;

  tmp0 = in[5 * stride];
  tmp1 = kIDCTMatrix[ 5] * tmp0;
  tmp2 = kIDCTMatrix[13] * tmp0;
  tmp3 = kIDCTMatrix[21] * tmp0;
  tmp4 = kIDCTMatrix[29] * tmp0;
  out[0] += tmp1;
  out[1] += tmp2;
  out[2] += tmp3;
  out[3] += tmp4;
  out[4] -= tmp4;
  out[5] -= tmp3;
  out[6] -= tmp2;
  out[7] -= tmp1;

  tmp0 = in[6 * stride];
  tmp1 = kIDCTMatrix[ 6] * tmp0;
  tmp2 = kIDCTMatrix[14] * tmp0;
  out[0] += tmp1;
  out[1] += tmp2;
  out[2] -= tmp2;
  out[3] -= tmp1;
  out[4] -= tmp1;
  out[5] -= tmp2;
  out[6] += tmp2;
  out[7] += tmp1;

  tmp0 = in[7 * stride];
  tmp1 = kIDCTMatrix[ 7] * tmp0;
  tmp2 = kIDCTMatrix[15] * tmp0;
  tmp3 = kIDCTMatrix[23] * tmp0;
  tmp4 = kIDCTMatrix[31] * tmp0;
  out[0] += tmp1;
  out[1] += tmp2;
  out[2] += tmp3;
  out[3] += tmp4;
  out[4] -= tmp4;
  out[5] -= tmp3;
  out[6] -= tmp2;
  out[7] -= tmp1;
}

void ComputeBlockIDCT(const coeff_t* block, uint8_t* out) {
  coeff_t colidcts[kDCTBlockSize];
  const int kColScale = 11;
  const int kColRound = 1 << (kColScale - 1);
  for (int x = 0; x < 8; ++x) {
    int colbuf[8] = { 0 };
    Compute1dIDCT(&block[x], 8, colbuf);
    for (int y = 0; y < 8; ++y) {
      colidcts[8 * y + x] = (colbuf[y] + kColRound) >> kColScale;
    }
  }
  const int kRowScale = 18;
  const int kRowRound = 257 << (kRowScale - 1);  // includes offset by 128
  for (int y = 0; y < 8; ++y) {
    const int rowidx = 8 * y;
    int rowbuf[8] = { 0 };
    Compute1dIDCT(&colidcts[rowidx], 1, rowbuf);
    for (int x = 0; x < 8; ++x) {
      out[rowidx + x] =
          std::max(0, std::min(255, (rowbuf[x] + kRowRound) >> kRowScale));
    }
  }
}

}  // namespace guetzli

Line	Count	Source
1		/*
2		* Copyright 2016 Google Inc.
3		*
4		* Licensed under the Apache License, Version 2.0 (the "License");
5		* you may not use this file except in compliance with the License.
6		* You may obtain a copy of the License at
7		*
8		* http://www.apache.org/licenses/LICENSE-2.0
9		*
10		* Unless required by applicable law or agreed to in writing, software
11		* distributed under the License is distributed on an "AS IS" BASIS,
12		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		* See the License for the specific language governing permissions and
14		* limitations under the License.
15		*/
16
17		// Integer implementation of the Inverse Discrete Cosine Transform (IDCT).
18
19		#include "guetzli/idct.h"
20
21		#include <algorithm>
22		#include <math.h>
23
24		namespace guetzli {
25
26		// kIDCTMatrix[8x+u] = alpha(u)cos((2x+1)uM_PI/16)sqrt(2), with fixed 13
27		// bit precision, where alpha(0) = 1/sqrt(2) and alpha(u) = 1 for u > 0.
28		// Some coefficients are off by +-1 to mimick libjpeg's behaviour.
29		static const int kIDCTMatrix[kDCTBlockSize] = {
30		8192, 11363, 10703, 9633, 8192, 6437, 4433, 2260,
31		8192, 9633, 4433, -2259, -8192, -11362, -10704, -6436,
32		8192, 6437, -4433, -11362, -8192, 2261, 10704, 9633,
33		8192, 2260, -10703, -6436, 8192, 9633, -4433, -11363,
34		8192, -2260, -10703, 6436, 8192, -9633, -4433, 11363,
35		8192, -6437, -4433, 11362, -8192, -2261, 10704, -9633,
36		8192, -9633, 4433, 2259, -8192, 11362, -10704, 6436,
37		8192, -11363, 10703, -9633, 8192, -6437, 4433, -2260,
38		};
39
40		// Computes out[x] = sum{kIDCTMatrix[8x+u]in[u*stride]; for u in [0..7]}
41	267M	inline void Compute1dIDCT(const coeff_t* in, const int stride, int out[8]) {
42	267M	int tmp0, tmp1, tmp2, tmp3, tmp4;
43
44	267M	tmp1 = kIDCTMatrix[0] * in[0];
45	267M	out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = tmp1;
46
47	267M	tmp0 = in[stride];
48	267M	tmp1 = kIDCTMatrix[ 1] * tmp0;
49	267M	tmp2 = kIDCTMatrix[ 9] * tmp0;
50	267M	tmp3 = kIDCTMatrix[17] * tmp0;
51	267M	tmp4 = kIDCTMatrix[25] * tmp0;
52	267M	out[0] += tmp1;
53	267M	out[1] += tmp2;
54	267M	out[2] += tmp3;
55	267M	out[3] += tmp4;
56	267M	out[4] -= tmp4;
57	267M	out[5] -= tmp3;
58	267M	out[6] -= tmp2;
59	267M	out[7] -= tmp1;
60
61	267M	tmp0 = in[2 * stride];
62	267M	tmp1 = kIDCTMatrix[ 2] * tmp0;
63	267M	tmp2 = kIDCTMatrix[10] * tmp0;
64	267M	out[0] += tmp1;
65	267M	out[1] += tmp2;
66	267M	out[2] -= tmp2;
67	267M	out[3] -= tmp1;
68	267M	out[4] -= tmp1;
69	267M	out[5] -= tmp2;
70	267M	out[6] += tmp2;
71	267M	out[7] += tmp1;
72
73	267M	tmp0 = in[3 * stride];
74	267M	tmp1 = kIDCTMatrix[ 3] * tmp0;
75	267M	tmp2 = kIDCTMatrix[11] * tmp0;
76	267M	tmp3 = kIDCTMatrix[19] * tmp0;
77	267M	tmp4 = kIDCTMatrix[27] * tmp0;
78	267M	out[0] += tmp1;
79	267M	out[1] += tmp2;
80	267M	out[2] += tmp3;
81	267M	out[3] += tmp4;
82	267M	out[4] -= tmp4;
83	267M	out[5] -= tmp3;
84	267M	out[6] -= tmp2;
85	267M	out[7] -= tmp1;
86
87	267M	tmp0 = in[4 * stride];
88	267M	tmp1 = kIDCTMatrix[ 4] * tmp0;
89	267M	out[0] += tmp1;
90	267M	out[1] -= tmp1;
91	267M	out[2] -= tmp1;
92	267M	out[3] += tmp1;
93	267M	out[4] += tmp1;
94	267M	out[5] -= tmp1;
95	267M	out[6] -= tmp1;
96	267M	out[7] += tmp1;
97
98	267M	tmp0 = in[5 * stride];
99	267M	tmp1 = kIDCTMatrix[ 5] * tmp0;
100	267M	tmp2 = kIDCTMatrix[13] * tmp0;
101	267M	tmp3 = kIDCTMatrix[21] * tmp0;
102	267M	tmp4 = kIDCTMatrix[29] * tmp0;
103	267M	out[0] += tmp1;
104	267M	out[1] += tmp2;
105	267M	out[2] += tmp3;
106	267M	out[3] += tmp4;
107	267M	out[4] -= tmp4;
108	267M	out[5] -= tmp3;
109	267M	out[6] -= tmp2;
110	267M	out[7] -= tmp1;
111
112	267M	tmp0 = in[6 * stride];
113	267M	tmp1 = kIDCTMatrix[ 6] * tmp0;
114	267M	tmp2 = kIDCTMatrix[14] * tmp0;
115	267M	out[0] += tmp1;
116	267M	out[1] += tmp2;
117	267M	out[2] -= tmp2;
118	267M	out[3] -= tmp1;
119	267M	out[4] -= tmp1;
120	267M	out[5] -= tmp2;
121	267M	out[6] += tmp2;
122	267M	out[7] += tmp1;
123
124	267M	tmp0 = in[7 * stride];
125	267M	tmp1 = kIDCTMatrix[ 7] * tmp0;
126	267M	tmp2 = kIDCTMatrix[15] * tmp0;
127	267M	tmp3 = kIDCTMatrix[23] * tmp0;
128	267M	tmp4 = kIDCTMatrix[31] * tmp0;
129	267M	out[0] += tmp1;
130	267M	out[1] += tmp2;
131	267M	out[2] += tmp3;
132	267M	out[3] += tmp4;
133	267M	out[4] -= tmp4;
134	267M	out[5] -= tmp3;
135	267M	out[6] -= tmp2;
136	267M	out[7] -= tmp1;
137	267M	}
138
139	16.7M	void ComputeBlockIDCT(const coeff_t* block, uint8_t* out) {
140	16.7M	coeff_t colidcts[kDCTBlockSize];
141	16.7M	const int kColScale = 11;
142	16.7M	const int kColRound = 1 << (kColScale - 1);
143	150M	for (int x = 0; x < 8; ++x) {
144	133M	int colbuf[8] = { 0 };
145	133M	Compute1dIDCT(&block[x], 8, colbuf);
146	1.20G	for (int y = 0; y < 8; ++y) {
147	1.07G	colidcts[8 * y + x] = (colbuf[y] + kColRound) >> kColScale;
148	1.07G	}
149	133M	}
150	16.7M	const int kRowScale = 18;
151	16.7M	const int kRowRound = 257 << (kRowScale - 1); // includes offset by 128
152	150M	for (int y = 0; y < 8; ++y) {
153	133M	const int rowidx = 8 * y;
154	133M	int rowbuf[8] = { 0 };
155	133M	Compute1dIDCT(&colidcts[rowidx], 1, rowbuf);
156	1.20G	for (int x = 0; x < 8; ++x) {
157	1.07G	out[rowidx + x] =
158	1.07G	std::max(0, std::min(255, (rowbuf[x] + kRowRound) >> kRowScale));
159	1.07G	}
160	133M	}
161	16.7M	}
162
163		} // namespace guetzli

Coverage Report

Created: 2025-08-03 06:52