/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jpeg-encoder-0.6.1/src/fdct.rs
Line | Count | Source |
1 | | /* |
2 | | * Ported from mozjpeg to rust |
3 | | * |
4 | | * This file was part of the Independent JPEG Group's software: |
5 | | * Copyright (C) 1991-1996, Thomas G. Lane. |
6 | | * libjpeg-turbo Modifications: |
7 | | * Copyright (C) 2015, 2020, D. R. Commander. |
8 | | * |
9 | | * Conditions of distribution and use: |
10 | | * In plain English: |
11 | | * |
12 | | * 1. We don't promise that this software works. (But if you find any bugs, |
13 | | * please let us know!) |
14 | | * 2. You can use this software for whatever you want. You don't have to pay us. |
15 | | * 3. You may not pretend that you wrote this software. If you use it in a |
16 | | * program, you must acknowledge somewhere in your documentation that |
17 | | * you've used the IJG code. |
18 | | * |
19 | | * In legalese: |
20 | | * |
21 | | * The authors make NO WARRANTY or representation, either express or implied, |
22 | | * with respect to this software, its quality, accuracy, merchantability, or |
23 | | * fitness for a particular purpose. This software is provided "AS IS", and you, |
24 | | * its user, assume the entire risk as to its quality and accuracy. |
25 | | * |
26 | | * This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding. |
27 | | * All Rights Reserved except as specified below. |
28 | | * |
29 | | * Permission is hereby granted to use, copy, modify, and distribute this |
30 | | * software (or portions thereof) for any purpose, without fee, subject to these |
31 | | * conditions: |
32 | | * (1) If any part of the source code for this software is distributed, then this |
33 | | * README file must be included, with this copyright and no-warranty notice |
34 | | * unaltered; and any additions, deletions, or changes to the original files |
35 | | * must be clearly indicated in accompanying documentation. |
36 | | * (2) If only executable code is distributed, then the accompanying |
37 | | * documentation must state that "this software is based in part on the work of |
38 | | * the Independent JPEG Group". |
39 | | * (3) Permission for use of this software is granted only if the user accepts |
40 | | * full responsibility for any undesirable consequences; the authors accept |
41 | | * NO LIABILITY for damages of any kind. |
42 | | * |
43 | | * These conditions apply to any software derived from or based on the IJG code, |
44 | | * not just to the unmodified library. If you use our work, you ought to |
45 | | * acknowledge us. |
46 | | * |
47 | | * Permission is NOT granted for the use of any IJG author's name or company name |
48 | | * in advertising or publicity relating to this software or products derived from |
49 | | * it. This software may be referred to only as "the Independent JPEG Group's |
50 | | * software". |
51 | | * |
52 | | * We specifically permit and encourage the use of this software as the basis of |
53 | | * commercial products, provided that all warranty or liability claims are |
54 | | * assumed by the product vendor. |
55 | | * |
56 | | * This file contains a slower but more accurate integer implementation of the |
57 | | * forward DCT (Discrete Cosine Transform). |
58 | | * |
59 | | * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT |
60 | | * on each column. Direct algorithms are also available, but they are |
61 | | * much more complex and seem not to be any faster when reduced to code. |
62 | | * |
63 | | * This implementation is based on an algorithm described in |
64 | | * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT |
65 | | * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, |
66 | | * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. |
67 | | * The primary algorithm described there uses 11 multiplies and 29 adds. |
68 | | * We use their alternate method with 12 multiplies and 32 adds. |
69 | | * The advantage of this method is that no data path contains more than one |
70 | | * multiplication; this allows a very simple and accurate implementation in |
71 | | * scaled fixed-point arithmetic, with a minimal number of shifts. |
72 | | */ |
73 | | |
74 | | const CONST_BITS: i32 = 13; |
75 | | const PASS1_BITS: i32 = 2; |
76 | | |
77 | | const FIX_0_298631336: i32 = 2446; |
78 | | const FIX_0_390180644: i32 = 3196; |
79 | | const FIX_0_541196100: i32 = 4433; |
80 | | const FIX_0_765366865: i32 = 6270; |
81 | | const FIX_0_899976223: i32 = 7373; |
82 | | const FIX_1_175875602: i32 = 9633; |
83 | | const FIX_1_501321110: i32 = 12299; |
84 | | const FIX_1_847759065: i32 = 15137; |
85 | | const FIX_1_961570560: i32 = 16069; |
86 | | const FIX_2_053119869: i32 = 16819; |
87 | | const FIX_2_562915447: i32 = 20995; |
88 | | const FIX_3_072711026: i32 = 25172; |
89 | | |
90 | | const DCT_SIZE: usize = 8; |
91 | | |
92 | | #[inline(always)] |
93 | 0 | fn descale(x: i32, n: i32) -> i32 { |
94 | | // right shift with rounding |
95 | 0 | (x + (1 << (n - 1))) >> n |
96 | 0 | } |
97 | | |
98 | | #[inline(always)] |
99 | 0 | fn into_el(v: i32) -> i16 { |
100 | 0 | v as i16 |
101 | 0 | } |
102 | | |
103 | | #[allow(clippy::erasing_op)] |
104 | | #[allow(clippy::identity_op)] |
105 | 0 | pub fn fdct(data: &mut [i16; 64]) { |
106 | | /* Pass 1: process rows. */ |
107 | | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ |
108 | | /* furthermore, we scale the results by 2**PASS1_BITS. */ |
109 | | |
110 | 0 | let mut data2 = [0i32; 64]; |
111 | | |
112 | 0 | for y in 0..8 { |
113 | 0 | let offset = y * 8; |
114 | 0 |
|
115 | 0 | let tmp0 = i32::from(data[offset + 0]) + i32::from(data[offset + 7]); |
116 | 0 | let tmp7 = i32::from(data[offset + 0]) - i32::from(data[offset + 7]); |
117 | 0 | let tmp1 = i32::from(data[offset + 1]) + i32::from(data[offset + 6]); |
118 | 0 | let tmp6 = i32::from(data[offset + 1]) - i32::from(data[offset + 6]); |
119 | 0 | let tmp2 = i32::from(data[offset + 2]) + i32::from(data[offset + 5]); |
120 | 0 | let tmp5 = i32::from(data[offset + 2]) - i32::from(data[offset + 5]); |
121 | 0 | let tmp3 = i32::from(data[offset + 3]) + i32::from(data[offset + 4]); |
122 | 0 | let tmp4 = i32::from(data[offset + 3]) - i32::from(data[offset + 4]); |
123 | 0 |
|
124 | 0 | /* Even part per LL&M figure 1 --- note that published figure is faulty; |
125 | 0 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". |
126 | 0 | */ |
127 | 0 |
|
128 | 0 | let tmp10 = tmp0 + tmp3; |
129 | 0 | let tmp13 = tmp0 - tmp3; |
130 | 0 | let tmp11 = tmp1 + tmp2; |
131 | 0 | let tmp12 = tmp1 - tmp2; |
132 | 0 |
|
133 | 0 | data2[offset + 0] = (tmp10 + tmp11) << PASS1_BITS; |
134 | 0 | data2[offset + 4] = (tmp10 - tmp11) << PASS1_BITS; |
135 | 0 |
|
136 | 0 | let z1 = (tmp12 + tmp13) * FIX_0_541196100; |
137 | 0 | data2[offset + 2] = descale( |
138 | 0 | z1 + (tmp13 * FIX_0_765366865), |
139 | 0 | CONST_BITS - PASS1_BITS, |
140 | 0 | ); |
141 | 0 | data2[offset + 6] = descale( |
142 | 0 | z1 + (tmp12 * -FIX_1_847759065), |
143 | 0 | CONST_BITS - PASS1_BITS, |
144 | 0 | ); |
145 | 0 |
|
146 | 0 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). |
147 | 0 | * cK represents cos(K*pi/16). |
148 | 0 | * i0..i3 in the paper are tmp4..tmp7 here. |
149 | 0 | */ |
150 | 0 |
|
151 | 0 | let z1 = tmp4 + tmp7; |
152 | 0 | let z2 = tmp5 + tmp6; |
153 | 0 | let z3 = tmp4 + tmp6; |
154 | 0 | let z4 = tmp5 + tmp7; |
155 | 0 | let z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */ |
156 | 0 |
|
157 | 0 | let tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */ |
158 | 0 | let tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */ |
159 | 0 | let tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */ |
160 | 0 | let tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */ |
161 | 0 | let z1 = z1 * -FIX_0_899976223; /* sqrt(2) * ( c7-c3) */ |
162 | 0 | let z2 = z2 * -FIX_2_562915447; /* sqrt(2) * (-c1-c3) */ |
163 | 0 | let z3 = z3 * -FIX_1_961570560; /* sqrt(2) * (-c3-c5) */ |
164 | 0 | let z4 = z4 * -FIX_0_390180644; /* sqrt(2) * ( c5-c3) */ |
165 | 0 |
|
166 | 0 | let z3 = z3 + z5; |
167 | 0 | let z4 = z4 + z5; |
168 | 0 |
|
169 | 0 | data2[offset + 7] = descale(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS); |
170 | 0 | data2[offset + 5] = descale(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS); |
171 | 0 | data2[offset + 3] = descale(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS); |
172 | 0 | data2[offset + 1] = descale(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS); |
173 | 0 | } |
174 | | |
175 | | /* Pass 2: process columns. |
176 | | * We remove the PASS1_BITS scaling, but leave the results scaled up |
177 | | * by an overall factor of 8. |
178 | | */ |
179 | | |
180 | 0 | for x in 0..8 { |
181 | 0 | let tmp0 = data2[DCT_SIZE * 0 + x] + data2[DCT_SIZE * 7 + x]; |
182 | 0 | let tmp7 = data2[DCT_SIZE * 0 + x] - data2[DCT_SIZE * 7 + x]; |
183 | 0 | let tmp1 = data2[DCT_SIZE * 1 + x] + data2[DCT_SIZE * 6 + x]; |
184 | 0 | let tmp6 = data2[DCT_SIZE * 1 + x] - data2[DCT_SIZE * 6 + x]; |
185 | 0 | let tmp2 = data2[DCT_SIZE * 2 + x] + data2[DCT_SIZE * 5 + x]; |
186 | 0 | let tmp5 = data2[DCT_SIZE * 2 + x] - data2[DCT_SIZE * 5 + x]; |
187 | 0 | let tmp3 = data2[DCT_SIZE * 3 + x] + data2[DCT_SIZE * 4 + x]; |
188 | 0 | let tmp4 = data2[DCT_SIZE * 3 + x] - data2[DCT_SIZE * 4 + x]; |
189 | 0 |
|
190 | 0 | /* Even part per LL&M figure 1 --- note that published figure is faulty; |
191 | 0 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". |
192 | 0 | */ |
193 | 0 |
|
194 | 0 | let tmp10 = tmp0 + tmp3; |
195 | 0 | let tmp13 = tmp0 - tmp3; |
196 | 0 | let tmp11 = tmp1 + tmp2; |
197 | 0 | let tmp12 = tmp1 - tmp2; |
198 | 0 |
|
199 | 0 | data[DCT_SIZE * 0 + x] = into_el(descale(tmp10 + tmp11, PASS1_BITS)); |
200 | 0 | data[DCT_SIZE * 4 + x] = into_el(descale(tmp10 - tmp11, PASS1_BITS)); |
201 | 0 |
|
202 | 0 | let z1 = (tmp12 + tmp13) * FIX_0_541196100; |
203 | 0 | data[DCT_SIZE * 2 + x] = into_el(descale( |
204 | 0 | z1 + tmp13 * FIX_0_765366865, |
205 | 0 | CONST_BITS + PASS1_BITS, |
206 | 0 | )); |
207 | 0 | data[DCT_SIZE * 6 + x] = into_el(descale( |
208 | 0 | z1 + tmp12 * -FIX_1_847759065, |
209 | 0 | CONST_BITS + PASS1_BITS, |
210 | 0 | )); |
211 | 0 |
|
212 | 0 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). |
213 | 0 | * cK represents cos(K*pi/16). |
214 | 0 | * i0..i3 in the paper are tmp4..tmp7 here. |
215 | 0 | */ |
216 | 0 |
|
217 | 0 | let z1 = tmp4 + tmp7; |
218 | 0 | let z2 = tmp5 + tmp6; |
219 | 0 | let z3 = tmp4 + tmp6; |
220 | 0 | let z4 = tmp5 + tmp7; |
221 | 0 | let z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */ |
222 | 0 |
|
223 | 0 | let tmp4 = tmp4 * FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */ |
224 | 0 | let tmp5 = tmp5 * FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */ |
225 | 0 | let tmp6 = tmp6 * FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */ |
226 | 0 | let tmp7 = tmp7 * FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */ |
227 | 0 | let z1 = z1 * -FIX_0_899976223; /* sqrt(2) * ( c7-c3) */ |
228 | 0 | let z2 = z2 * -FIX_2_562915447; /* sqrt(2) * (-c1-c3) */ |
229 | 0 | let z3 = z3 * -FIX_1_961570560; /* sqrt(2) * (-c3-c5) */ |
230 | 0 | let z4 = z4 * -FIX_0_390180644; /* sqrt(2) * ( c5-c3) */ |
231 | 0 |
|
232 | 0 | let z3 = z3 + z5; |
233 | 0 | let z4 = z4 + z5; |
234 | 0 |
|
235 | 0 | data[DCT_SIZE * 7 + x] = into_el(descale(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS)); |
236 | 0 | data[DCT_SIZE * 5 + x] = into_el(descale(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS)); |
237 | 0 | data[DCT_SIZE * 3 + x] = into_el(descale(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS)); |
238 | 0 | data[DCT_SIZE * 1 + x] = into_el(descale(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS)); |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | | #[cfg(test)] |
243 | | mod tests { |
244 | | |
245 | | // Inputs and outputs are taken from libjpegs jpeg_fdct_islow for a typical image |
246 | | |
247 | | use super::fdct; |
248 | | |
249 | | const INPUT1: [i16; 64] = [ |
250 | | -70, -71, -70, -68, -67, -67, -67, -67, -72, -73, -72, -70, -69, -69, -68, -69, -75, -76, |
251 | | -74, -73, -73, -72, -71, -70, -77, -78, -77, -75, -76, -75, -73, -71, -78, -77, -77, -76, |
252 | | -79, -77, -76, -75, -78, -78, -77, -77, -77, -77, -78, -77, -79, -79, -78, -78, -78, -78, |
253 | | -79, -78, -80, -79, -78, -78, -81, -80, -78, -76, |
254 | | ]; |
255 | | |
256 | | const OUTPUT1: [i16; 64] = [ |
257 | | -4786, -66, 2, -18, 12, 12, 5, -7, 223, -37, -8, 21, 8, 5, -4, 6, 60, 6, -10, 5, 0, -2, -1, |
258 | | 5, 21, 21, -15, 12, -2, -7, 1, 0, -2, -5, 16, -15, 0, 5, -4, -8, 0, -7, -4, 6, 7, -4, 5, 4, |
259 | | 3, 0, 1, -5, 0, -1, 4, 1, -5, 7, 0, -3, -6, 1, 1, -4, |
260 | | ]; |
261 | | |
262 | | const INPUT2: [i16; 64] = [ |
263 | | 21, 28, 11, 24, -45, -37, -55, -103, 38, -8, 31, 17, -19, 49, 15, -76, 22, -48, -36, -31, |
264 | | -23, 35, -23, -72, 13, -30, -45, -42, -44, -15, -20, -44, 13, -30, -45, -42, -44, -15, -20, |
265 | | -44, 13, -30, -45, -42, -44, -15, -20, -44, 13, -30, -45, -42, -44, -15, -20, -44, 13, -30, |
266 | | -45, -42, -44, -15, -20, -44, |
267 | | ]; |
268 | | |
269 | | const OUTPUT2: [i16; 64] = [ |
270 | | -1420, 717, 187, 910, -244, 579, 222, -191, 461, 487, -497, -29, -220, 179, 63, -95, 213, |
271 | | 414, -235, -187, -108, 74, -73, -70, -63, 311, 13, -290, 17, -38, -180, -47, -254, 201, |
272 | | 116, -247, 102, -109, -185, -36, -310, 107, 73, -91, 126, -121, -99, -37, -253, 43, -15, |
273 | | 53, 101, -91, -3, -37, -136, 12, -44, 81, 53, -45, 31, -24, |
274 | | ]; |
275 | | |
276 | | #[test] |
277 | | pub fn test_fdct_libjpeg() { |
278 | | let mut i1 = INPUT1.clone(); |
279 | | fdct(&mut i1); |
280 | | assert_eq!(i1, OUTPUT1); |
281 | | |
282 | | let mut i2 = INPUT2.clone(); |
283 | | fdct(&mut i2); |
284 | | assert_eq!(i2, OUTPUT2); |
285 | | } |
286 | | } |