/src/guetzli/guetzli/idct.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2016 Google Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | // Integer implementation of the Inverse Discrete Cosine Transform (IDCT). |
18 | | |
19 | | #include "guetzli/idct.h" |
20 | | |
21 | | #include <algorithm> |
22 | | #include <math.h> |
23 | | |
24 | | namespace guetzli { |
25 | | |
26 | | // kIDCTMatrix[8*x+u] = alpha(u)*cos((2*x+1)*u*M_PI/16)*sqrt(2), with fixed 13 |
27 | | // bit precision, where alpha(0) = 1/sqrt(2) and alpha(u) = 1 for u > 0. |
28 | | // Some coefficients are off by +-1 to mimick libjpeg's behaviour. |
29 | | static const int kIDCTMatrix[kDCTBlockSize] = { |
30 | | 8192, 11363, 10703, 9633, 8192, 6437, 4433, 2260, |
31 | | 8192, 9633, 4433, -2259, -8192, -11362, -10704, -6436, |
32 | | 8192, 6437, -4433, -11362, -8192, 2261, 10704, 9633, |
33 | | 8192, 2260, -10703, -6436, 8192, 9633, -4433, -11363, |
34 | | 8192, -2260, -10703, 6436, 8192, -9633, -4433, 11363, |
35 | | 8192, -6437, -4433, 11362, -8192, -2261, 10704, -9633, |
36 | | 8192, -9633, 4433, 2259, -8192, 11362, -10704, 6436, |
37 | | 8192, -11363, 10703, -9633, 8192, -6437, 4433, -2260, |
38 | | }; |
39 | | |
40 | | // Computes out[x] = sum{kIDCTMatrix[8*x+u]*in[u*stride]; for u in [0..7]} |
41 | 267M | inline void Compute1dIDCT(const coeff_t* in, const int stride, int out[8]) { |
42 | 267M | int tmp0, tmp1, tmp2, tmp3, tmp4; |
43 | | |
44 | 267M | tmp1 = kIDCTMatrix[0] * in[0]; |
45 | 267M | out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = tmp1; |
46 | | |
47 | 267M | tmp0 = in[stride]; |
48 | 267M | tmp1 = kIDCTMatrix[ 1] * tmp0; |
49 | 267M | tmp2 = kIDCTMatrix[ 9] * tmp0; |
50 | 267M | tmp3 = kIDCTMatrix[17] * tmp0; |
51 | 267M | tmp4 = kIDCTMatrix[25] * tmp0; |
52 | 267M | out[0] += tmp1; |
53 | 267M | out[1] += tmp2; |
54 | 267M | out[2] += tmp3; |
55 | 267M | out[3] += tmp4; |
56 | 267M | out[4] -= tmp4; |
57 | 267M | out[5] -= tmp3; |
58 | 267M | out[6] -= tmp2; |
59 | 267M | out[7] -= tmp1; |
60 | | |
61 | 267M | tmp0 = in[2 * stride]; |
62 | 267M | tmp1 = kIDCTMatrix[ 2] * tmp0; |
63 | 267M | tmp2 = kIDCTMatrix[10] * tmp0; |
64 | 267M | out[0] += tmp1; |
65 | 267M | out[1] += tmp2; |
66 | 267M | out[2] -= tmp2; |
67 | 267M | out[3] -= tmp1; |
68 | 267M | out[4] -= tmp1; |
69 | 267M | out[5] -= tmp2; |
70 | 267M | out[6] += tmp2; |
71 | 267M | out[7] += tmp1; |
72 | | |
73 | 267M | tmp0 = in[3 * stride]; |
74 | 267M | tmp1 = kIDCTMatrix[ 3] * tmp0; |
75 | 267M | tmp2 = kIDCTMatrix[11] * tmp0; |
76 | 267M | tmp3 = kIDCTMatrix[19] * tmp0; |
77 | 267M | tmp4 = kIDCTMatrix[27] * tmp0; |
78 | 267M | out[0] += tmp1; |
79 | 267M | out[1] += tmp2; |
80 | 267M | out[2] += tmp3; |
81 | 267M | out[3] += tmp4; |
82 | 267M | out[4] -= tmp4; |
83 | 267M | out[5] -= tmp3; |
84 | 267M | out[6] -= tmp2; |
85 | 267M | out[7] -= tmp1; |
86 | | |
87 | 267M | tmp0 = in[4 * stride]; |
88 | 267M | tmp1 = kIDCTMatrix[ 4] * tmp0; |
89 | 267M | out[0] += tmp1; |
90 | 267M | out[1] -= tmp1; |
91 | 267M | out[2] -= tmp1; |
92 | 267M | out[3] += tmp1; |
93 | 267M | out[4] += tmp1; |
94 | 267M | out[5] -= tmp1; |
95 | 267M | out[6] -= tmp1; |
96 | 267M | out[7] += tmp1; |
97 | | |
98 | 267M | tmp0 = in[5 * stride]; |
99 | 267M | tmp1 = kIDCTMatrix[ 5] * tmp0; |
100 | 267M | tmp2 = kIDCTMatrix[13] * tmp0; |
101 | 267M | tmp3 = kIDCTMatrix[21] * tmp0; |
102 | 267M | tmp4 = kIDCTMatrix[29] * tmp0; |
103 | 267M | out[0] += tmp1; |
104 | 267M | out[1] += tmp2; |
105 | 267M | out[2] += tmp3; |
106 | 267M | out[3] += tmp4; |
107 | 267M | out[4] -= tmp4; |
108 | 267M | out[5] -= tmp3; |
109 | 267M | out[6] -= tmp2; |
110 | 267M | out[7] -= tmp1; |
111 | | |
112 | 267M | tmp0 = in[6 * stride]; |
113 | 267M | tmp1 = kIDCTMatrix[ 6] * tmp0; |
114 | 267M | tmp2 = kIDCTMatrix[14] * tmp0; |
115 | 267M | out[0] += tmp1; |
116 | 267M | out[1] += tmp2; |
117 | 267M | out[2] -= tmp2; |
118 | 267M | out[3] -= tmp1; |
119 | 267M | out[4] -= tmp1; |
120 | 267M | out[5] -= tmp2; |
121 | 267M | out[6] += tmp2; |
122 | 267M | out[7] += tmp1; |
123 | | |
124 | 267M | tmp0 = in[7 * stride]; |
125 | 267M | tmp1 = kIDCTMatrix[ 7] * tmp0; |
126 | 267M | tmp2 = kIDCTMatrix[15] * tmp0; |
127 | 267M | tmp3 = kIDCTMatrix[23] * tmp0; |
128 | 267M | tmp4 = kIDCTMatrix[31] * tmp0; |
129 | 267M | out[0] += tmp1; |
130 | 267M | out[1] += tmp2; |
131 | 267M | out[2] += tmp3; |
132 | 267M | out[3] += tmp4; |
133 | 267M | out[4] -= tmp4; |
134 | 267M | out[5] -= tmp3; |
135 | 267M | out[6] -= tmp2; |
136 | 267M | out[7] -= tmp1; |
137 | 267M | } |
138 | | |
139 | 16.7M | void ComputeBlockIDCT(const coeff_t* block, uint8_t* out) { |
140 | 16.7M | coeff_t colidcts[kDCTBlockSize]; |
141 | 16.7M | const int kColScale = 11; |
142 | 16.7M | const int kColRound = 1 << (kColScale - 1); |
143 | 150M | for (int x = 0; x < 8; ++x) { |
144 | 133M | int colbuf[8] = { 0 }; |
145 | 133M | Compute1dIDCT(&block[x], 8, colbuf); |
146 | 1.20G | for (int y = 0; y < 8; ++y) { |
147 | 1.07G | colidcts[8 * y + x] = (colbuf[y] + kColRound) >> kColScale; |
148 | 1.07G | } |
149 | 133M | } |
150 | 16.7M | const int kRowScale = 18; |
151 | 16.7M | const int kRowRound = 257 << (kRowScale - 1); // includes offset by 128 |
152 | 150M | for (int y = 0; y < 8; ++y) { |
153 | 133M | const int rowidx = 8 * y; |
154 | 133M | int rowbuf[8] = { 0 }; |
155 | 133M | Compute1dIDCT(&colidcts[rowidx], 1, rowbuf); |
156 | 1.20G | for (int x = 0; x < 8; ++x) { |
157 | 1.07G | out[rowidx + x] = |
158 | 1.07G | std::max(0, std::min(255, (rowbuf[x] + kRowRound) >> kRowScale)); |
159 | 1.07G | } |
160 | 133M | } |
161 | 16.7M | } |
162 | | |
163 | | } // namespace guetzli |