/src/mpg123/src/libmpg123/dct64.c
Line | Count | Source |
1 | | /* |
2 | | INT123_dct64.c: DCT64, the plain C version |
3 | | |
4 | | copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 |
5 | | see COPYING and AUTHORS files in distribution or http://mpg123.org |
6 | | initially written by Michael Hipp |
7 | | */ |
8 | | |
9 | | /* |
10 | | * Discrete Cosine Tansform (DCT) for subband synthesis |
11 | | * |
12 | | * -funroll-loops (for gcc) will remove the loops for better performance |
13 | | * using loops in the source-code enhances readabillity |
14 | | * |
15 | | * |
16 | | * TODO: write an optimized version for the down-sampling modes |
17 | | * (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero |
18 | | */ |
19 | | |
20 | | #include "mpg123lib_intern.h" |
21 | | |
22 | | void INT123_dct64(real *out0,real *out1,real *samples) |
23 | 0 | { |
24 | 0 | real bufs[64]; |
25 | |
|
26 | 0 | { |
27 | 0 | register int i,j; |
28 | 0 | register real *b1,*b2,*bs; |
29 | 0 | register const real *costab; |
30 | |
|
31 | 0 | b1 = samples; |
32 | 0 | bs = bufs; |
33 | 0 | costab = INT123_pnts[0]+16; |
34 | 0 | b2 = b1 + 32; |
35 | |
|
36 | 0 | for(i=15;i>=0;i--) |
37 | 0 | *bs++ = (*b1++ + *--b2); |
38 | 0 | for(i=15;i>=0;i--) |
39 | 0 | *bs++ = REAL_MUL((*--b2 - *b1++), *--costab); |
40 | |
|
41 | 0 | b1 = bufs; |
42 | 0 | costab = INT123_pnts[1]+8; |
43 | 0 | b2 = b1 + 16; |
44 | |
|
45 | 0 | { |
46 | 0 | for(i=7;i>=0;i--) |
47 | 0 | *bs++ = (*b1++ + *--b2); |
48 | 0 | for(i=7;i>=0;i--) |
49 | 0 | *bs++ = REAL_MUL((*--b2 - *b1++), *--costab); |
50 | 0 | b2 += 32; |
51 | 0 | costab += 8; |
52 | 0 | for(i=7;i>=0;i--) |
53 | 0 | *bs++ = (*b1++ + *--b2); |
54 | 0 | for(i=7;i>=0;i--) |
55 | 0 | *bs++ = REAL_MUL((*b1++ - *--b2), *--costab); |
56 | 0 | b2 += 32; |
57 | 0 | } |
58 | |
|
59 | 0 | bs = bufs; |
60 | 0 | costab = INT123_pnts[2]; |
61 | 0 | b2 = b1 + 8; |
62 | |
|
63 | 0 | for(j=2;j;j--) |
64 | 0 | { |
65 | 0 | for(i=3;i>=0;i--) |
66 | 0 | *bs++ = (*b1++ + *--b2); |
67 | 0 | for(i=3;i>=0;i--) |
68 | 0 | *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]); |
69 | 0 | b2 += 16; |
70 | 0 | for(i=3;i>=0;i--) |
71 | 0 | *bs++ = (*b1++ + *--b2); |
72 | 0 | for(i=3;i>=0;i--) |
73 | 0 | *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]); |
74 | 0 | b2 += 16; |
75 | 0 | } |
76 | |
|
77 | 0 | b1 = bufs; |
78 | 0 | costab = INT123_pnts[3]; |
79 | 0 | b2 = b1 + 4; |
80 | |
|
81 | 0 | for(j=4;j;j--) |
82 | 0 | { |
83 | 0 | *bs++ = (*b1++ + *--b2); |
84 | 0 | *bs++ = (*b1++ + *--b2); |
85 | 0 | *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]); |
86 | 0 | *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]); |
87 | 0 | b2 += 8; |
88 | 0 | *bs++ = (*b1++ + *--b2); |
89 | 0 | *bs++ = (*b1++ + *--b2); |
90 | 0 | *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]); |
91 | 0 | *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]); |
92 | 0 | b2 += 8; |
93 | 0 | } |
94 | 0 | bs = bufs; |
95 | 0 | costab = INT123_pnts[4]; |
96 | |
|
97 | 0 | for(j=8;j;j--) |
98 | 0 | { |
99 | 0 | real v0,v1; |
100 | 0 | v0=*b1++; v1 = *b1++; |
101 | 0 | *bs++ = (v0 + v1); |
102 | 0 | *bs++ = REAL_MUL((v0 - v1), (*costab)); |
103 | 0 | v0=*b1++; v1 = *b1++; |
104 | 0 | *bs++ = (v0 + v1); |
105 | 0 | *bs++ = REAL_MUL((v1 - v0), (*costab)); |
106 | 0 | } |
107 | |
|
108 | 0 | } |
109 | | |
110 | |
|
111 | 0 | { |
112 | 0 | register real *b1; |
113 | 0 | register int i; |
114 | |
|
115 | 0 | for(b1=bufs,i=8;i;i--,b1+=4) |
116 | 0 | b1[2] += b1[3]; |
117 | |
|
118 | 0 | for(b1=bufs,i=4;i;i--,b1+=8) |
119 | 0 | { |
120 | 0 | b1[4] += b1[6]; |
121 | 0 | b1[6] += b1[5]; |
122 | 0 | b1[5] += b1[7]; |
123 | 0 | } |
124 | |
|
125 | 0 | for(b1=bufs,i=2;i;i--,b1+=16) |
126 | 0 | { |
127 | 0 | b1[8] += b1[12]; |
128 | 0 | b1[12] += b1[10]; |
129 | 0 | b1[10] += b1[14]; |
130 | 0 | b1[14] += b1[9]; |
131 | 0 | b1[9] += b1[13]; |
132 | 0 | b1[13] += b1[11]; |
133 | 0 | b1[11] += b1[15]; |
134 | 0 | } |
135 | 0 | } |
136 | | |
137 | |
|
138 | 0 | out0[0x10*16] = REAL_SCALE_DCT64(bufs[0]); |
139 | 0 | out0[0x10*15] = REAL_SCALE_DCT64(bufs[16+0] + bufs[16+8]); |
140 | 0 | out0[0x10*14] = REAL_SCALE_DCT64(bufs[8]); |
141 | 0 | out0[0x10*13] = REAL_SCALE_DCT64(bufs[16+8] + bufs[16+4]); |
142 | 0 | out0[0x10*12] = REAL_SCALE_DCT64(bufs[4]); |
143 | 0 | out0[0x10*11] = REAL_SCALE_DCT64(bufs[16+4] + bufs[16+12]); |
144 | 0 | out0[0x10*10] = REAL_SCALE_DCT64(bufs[12]); |
145 | 0 | out0[0x10* 9] = REAL_SCALE_DCT64(bufs[16+12] + bufs[16+2]); |
146 | 0 | out0[0x10* 8] = REAL_SCALE_DCT64(bufs[2]); |
147 | 0 | out0[0x10* 7] = REAL_SCALE_DCT64(bufs[16+2] + bufs[16+10]); |
148 | 0 | out0[0x10* 6] = REAL_SCALE_DCT64(bufs[10]); |
149 | 0 | out0[0x10* 5] = REAL_SCALE_DCT64(bufs[16+10] + bufs[16+6]); |
150 | 0 | out0[0x10* 4] = REAL_SCALE_DCT64(bufs[6]); |
151 | 0 | out0[0x10* 3] = REAL_SCALE_DCT64(bufs[16+6] + bufs[16+14]); |
152 | 0 | out0[0x10* 2] = REAL_SCALE_DCT64(bufs[14]); |
153 | 0 | out0[0x10* 1] = REAL_SCALE_DCT64(bufs[16+14] + bufs[16+1]); |
154 | 0 | out0[0x10* 0] = REAL_SCALE_DCT64(bufs[1]); |
155 | |
|
156 | 0 | out1[0x10* 0] = REAL_SCALE_DCT64(bufs[1]); |
157 | 0 | out1[0x10* 1] = REAL_SCALE_DCT64(bufs[16+1] + bufs[16+9]); |
158 | 0 | out1[0x10* 2] = REAL_SCALE_DCT64(bufs[9]); |
159 | 0 | out1[0x10* 3] = REAL_SCALE_DCT64(bufs[16+9] + bufs[16+5]); |
160 | 0 | out1[0x10* 4] = REAL_SCALE_DCT64(bufs[5]); |
161 | 0 | out1[0x10* 5] = REAL_SCALE_DCT64(bufs[16+5] + bufs[16+13]); |
162 | 0 | out1[0x10* 6] = REAL_SCALE_DCT64(bufs[13]); |
163 | 0 | out1[0x10* 7] = REAL_SCALE_DCT64(bufs[16+13] + bufs[16+3]); |
164 | 0 | out1[0x10* 8] = REAL_SCALE_DCT64(bufs[3]); |
165 | 0 | out1[0x10* 9] = REAL_SCALE_DCT64(bufs[16+3] + bufs[16+11]); |
166 | 0 | out1[0x10*10] = REAL_SCALE_DCT64(bufs[11]); |
167 | 0 | out1[0x10*11] = REAL_SCALE_DCT64(bufs[16+11] + bufs[16+7]); |
168 | 0 | out1[0x10*12] = REAL_SCALE_DCT64(bufs[7]); |
169 | 0 | out1[0x10*13] = REAL_SCALE_DCT64(bufs[16+7] + bufs[16+15]); |
170 | 0 | out1[0x10*14] = REAL_SCALE_DCT64(bufs[15]); |
171 | 0 | out1[0x10*15] = REAL_SCALE_DCT64(bufs[16+15]); |
172 | |
|
173 | 0 | } |
174 | | |
175 | | |