Coverage Report

Created: 2023-06-07 06:21

/src/guetzli/guetzli/fdct.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2016 Google Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 * http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
// Integer implementation of the Discrete Cosine Transform (DCT)
18
//
19
// Note! DCT output is kept scaled by 16, to retain maximum 16bit precision
20
21
#include "guetzli/fdct.h"
22
23
namespace guetzli {
24
25
namespace {
26
27
///////////////////////////////////////////////////////////////////////////////
28
// Cosine table: C(k) = cos(k.pi/16)/sqrt(2), k = 1..7 using 15 bits signed
29
const coeff_t kTable04[7] = { 22725, 21407, 19266, 16384, 12873,  8867, 4520 };
30
// rows #1 and #7 are pre-multiplied by 2.C(1) before the 2nd pass.
31
// This multiply is merged in the table of constants used during 1st pass:
32
const coeff_t kTable17[7] = { 31521, 29692, 26722, 22725, 17855, 12299, 6270 };
33
// rows #2 and #6 are pre-multiplied by 2.C(2):
34
const coeff_t kTable26[7] = { 29692, 27969, 25172, 21407, 16819, 11585, 5906 };
35
// rows #3 and #5 are pre-multiplied by 2.C(3):
36
const coeff_t kTable35[7] = { 26722, 25172, 22654, 19266, 15137, 10426, 5315 };
37
38
///////////////////////////////////////////////////////////////////////////////
39
// Constants (15bit precision) and C macros for IDCT vertical pass
40
41
#define kTan1   (13036)   // = tan(pi/16)
42
#define kTan2   (27146)   // = tan(2.pi/16) = sqrt(2) - 1.
43
#define kTan3m1 (-21746)  // = tan(3.pi/16) - 1
44
#define k2Sqrt2 (23170)   // = 1 / 2.sqrt(2)
45
46
  // performs: {a,b} <- {a-b, a+b}, without saturation
47
0
#define BUTTERFLY(a, b) do {   \
48
0
  SUB((a), (b));               \
49
0
  ADD((b), (b));               \
50
0
  ADD((b), (a));               \
51
0
} while (0)
52
53
///////////////////////////////////////////////////////////////////////////////
54
// Constants for DCT horizontal pass
55
56
// Note about the CORRECT_LSB macro:
57
// using 16bit fixed-point constants, we often compute products like:
58
// p = (A*x + B*y + 32768) >> 16 by adding two sub-terms q = (A*x) >> 16
59
// and r = (B*y) >> 16 together. Statistically, we have p = q + r + 1
60
// in 3/4 of the cases. This can be easily seen from the relation:
61
//   (a + b + 1) >> 1 = (a >> 1) + (b >> 1) + ((a|b)&1)
62
// The approximation we are doing is replacing ((a|b)&1) by 1.
63
// In practice, this is a slightly more involved because the constants A and B
64
// have also been rounded compared to their exact floating point value.
65
// However, all in all the correction is quite small, and CORRECT_LSB can
66
// be defined empty if needed.
67
68
0
#define COLUMN_DCT8(in) do { \
69
0
  LOAD(m0, (in)[0 * 8]);     \
70
0
  LOAD(m2, (in)[2 * 8]);     \
71
0
  LOAD(m7, (in)[7 * 8]);     \
72
0
  LOAD(m5, (in)[5 * 8]);     \
73
0
                             \
74
0
  BUTTERFLY(m0, m7);         \
75
0
  BUTTERFLY(m2, m5);         \
76
0
                             \
77
0
  LOAD(m3, (in)[3 * 8]);     \
78
0
  LOAD(m4, (in)[4 * 8]);     \
79
0
  BUTTERFLY(m3, m4);         \
80
0
                             \
81
0
  LOAD(m6, (in)[6 * 8]);     \
82
0
  LOAD(m1, (in)[1 * 8]);     \
83
0
  BUTTERFLY(m1, m6);         \
84
0
  BUTTERFLY(m7, m4);         \
85
0
  BUTTERFLY(m6, m5);         \
86
0
                             \
87
0
  /* RowIdct() needs 15bits fixed-point input, when the output from   */ \
88
0
  /* ColumnIdct() would be 12bits. We are better doing the shift by 3 */ \
89
0
  /* now instead of in RowIdct(), because we have some multiplies to  */ \
90
0
  /* perform, that can take advantage of the extra 3bits precision.   */ \
91
0
  LSHIFT(m4, 3);             \
92
0
  LSHIFT(m5, 3);             \
93
0
  BUTTERFLY(m4, m5);         \
94
0
  STORE16((in)[0 * 8], m5);  \
95
0
  STORE16((in)[4 * 8], m4);  \
96
0
                             \
97
0
  LSHIFT(m7, 3);             \
98
0
  LSHIFT(m6, 3);             \
99
0
  LSHIFT(m3, 3);             \
100
0
  LSHIFT(m0, 3);             \
101
0
                             \
102
0
  LOAD_CST(m4, kTan2);       \
103
0
  m5 = m4;                   \
104
0
  MULT(m4, m7);              \
105
0
  MULT(m5, m6);              \
106
0
  SUB(m4, m6);               \
107
0
  ADD(m5, m7);               \
108
0
  STORE16((in)[2 * 8], m5);  \
109
0
  STORE16((in)[6 * 8], m4);  \
110
0
                             \
111
0
  /* We should be multiplying m6 by C4 = 1/sqrt(2) here, but we only have */ \
112
0
  /* the k2Sqrt2 = 1/(2.sqrt(2)) constant that fits into 15bits. So we    */ \
113
0
  /* shift by 4 instead of 3 to compensate for the additional 1/2 factor. */ \
114
0
  LOAD_CST(m6, k2Sqrt2);     \
115
0
  LSHIFT(m2, 3 + 1);         \
116
0
  LSHIFT(m1, 3 + 1);         \
117
0
  BUTTERFLY(m1, m2);         \
118
0
  MULT(m2, m6);              \
119
0
  MULT(m1, m6);              \
120
0
  BUTTERFLY(m3, m1);         \
121
0
  BUTTERFLY(m0, m2);         \
122
0
                             \
123
0
  LOAD_CST(m4, kTan3m1);     \
124
0
  LOAD_CST(m5, kTan1);       \
125
0
  m7 = m3;                   \
126
0
  m6 = m1;                   \
127
0
  MULT(m3, m4);              \
128
0
  MULT(m1, m5);              \
129
0
                             \
130
0
  ADD(m3, m7);               \
131
0
  ADD(m1, m2);               \
132
0
  CORRECT_LSB(m1);           \
133
0
  CORRECT_LSB(m3);           \
134
0
  MULT(m4, m0);              \
135
0
  MULT(m5, m2);              \
136
0
  ADD(m4, m0);               \
137
0
  SUB(m0, m3);               \
138
0
  ADD(m7, m4);               \
139
0
  SUB(m5, m6);               \
140
0
                             \
141
0
  STORE16((in)[1 * 8], m1);  \
142
0
  STORE16((in)[3 * 8], m0);  \
143
0
  STORE16((in)[5 * 8], m7);  \
144
0
  STORE16((in)[7 * 8], m5);  \
145
0
} while (0)
146
147
148
// these are the macro required by COLUMN_*
149
0
#define LOAD_CST(dst, src) (dst) = (src)
150
0
#define LOAD(dst, src) (dst) = (src)
151
0
#define MULT(a, b)  (a) = (((a) * (b)) >> 16)
152
0
#define ADD(a, b)   (a) = (a) + (b)
153
0
#define SUB(a, b)   (a) = (a) - (b)
154
0
#define LSHIFT(a, n) (a) = ((a) << (n))
155
0
#define STORE16(a, b) (a) = (b)
156
0
#define CORRECT_LSB(a) (a) += 1
157
158
// DCT vertical pass
159
160
0
inline void ColumnDct(coeff_t* in) {
161
0
  for (int i = 0; i < 8; ++i) {
162
0
    int m0, m1, m2, m3, m4, m5, m6, m7;
163
0
    COLUMN_DCT8(in + i);
164
0
  }
165
0
}
166
167
// DCT horizontal pass
168
169
// We don't really need to round before descaling, since we
170
// still have 4 bits of precision left as final scaled output.
171
0
#define DESCALE(a)  static_cast<coeff_t>((a) >> 16)
172
173
0
void RowDct(coeff_t* in, const coeff_t* table) {
174
  // The Fourier transform is an unitary operator, so we're basically
175
  // doing the transpose of RowIdct()
176
0
  const int a0 = in[0] + in[7];
177
0
  const int b0 = in[0] - in[7];
178
0
  const int a1 = in[1] + in[6];
179
0
  const int b1 = in[1] - in[6];
180
0
  const int a2 = in[2] + in[5];
181
0
  const int b2 = in[2] - in[5];
182
0
  const int a3 = in[3] + in[4];
183
0
  const int b3 = in[3] - in[4];
184
185
  // even part
186
0
  const int C2 = table[1];
187
0
  const int C4 = table[3];
188
0
  const int C6 = table[5];
189
0
  const int c0 = a0 + a3;
190
0
  const int c1 = a0 - a3;
191
0
  const int c2 = a1 + a2;
192
0
  const int c3 = a1 - a2;
193
194
0
  in[0] = DESCALE(C4 * (c0 + c2));
195
0
  in[4] = DESCALE(C4 * (c0 - c2));
196
0
  in[2] = DESCALE(C2 * c1 + C6 * c3);
197
0
  in[6] = DESCALE(C6 * c1 - C2 * c3);
198
199
  // odd part
200
0
  const int C1 = table[0];
201
0
  const int C3 = table[2];
202
0
  const int C5 = table[4];
203
0
  const int C7 = table[6];
204
0
  in[1] = DESCALE(C1 * b0 + C3 * b1 + C5 * b2 + C7 * b3);
205
0
  in[3] = DESCALE(C3 * b0 - C7 * b1 - C1 * b2 - C5 * b3);
206
0
  in[5] = DESCALE(C5 * b0 - C1 * b1 + C7 * b2 + C3 * b3);
207
0
  in[7] = DESCALE(C7 * b0 - C5 * b1 + C3 * b2 - C1 * b3);
208
0
}
209
#undef DESCALE
210
#undef LOAD_CST
211
#undef LOAD
212
#undef MULT
213
#undef ADD
214
#undef SUB
215
#undef LSHIFT
216
#undef STORE16
217
#undef CORRECT_LSB
218
#undef kTan1
219
#undef kTan2
220
#undef kTan3m1
221
#undef k2Sqrt2
222
#undef BUTTERFLY
223
#undef COLUMN_DCT8
224
225
}  // namespace
226
227
///////////////////////////////////////////////////////////////////////////////
228
// visible FDCT callable functions
229
230
0
void ComputeBlockDCT(coeff_t* coeffs) {
231
0
  ColumnDct(coeffs);
232
0
  RowDct(coeffs + 0 * 8, kTable04);
233
0
  RowDct(coeffs + 1 * 8, kTable17);
234
0
  RowDct(coeffs + 2 * 8, kTable26);
235
0
  RowDct(coeffs + 3 * 8, kTable35);
236
0
  RowDct(coeffs + 4 * 8, kTable04);
237
0
  RowDct(coeffs + 5 * 8, kTable35);
238
0
  RowDct(coeffs + 6 * 8, kTable26);
239
0
  RowDct(coeffs + 7 * 8, kTable17);
240
0
}
241
242
}  // namespace guetzli