Coverage Report

Created: 2025-07-18 06:54

/src/tremor/mdct.c
Line
Count
Source (jump to first uncovered line)
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
4
 *                                                                  *
5
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
6
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
8
 *                                                                  *
9
 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
10
 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
11
 *                                                                  *
12
 ********************************************************************
13
14
 function: normalized modified discrete cosine transform
15
           power of two length transform only [64 <= n ]
16
 last mod: $Id$
17
18
 Original algorithm adapted long ago from _The use of multirate filter
19
 banks for coding of high quality digital audio_, by T. Sporer,
20
 K. Brandenburg and B. Edler, collection of the European Signal
21
 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22
 211-214
23
24
 The below code implements an algorithm that no longer looks much like
25
 that presented in the paper, but the basic structure remains if you
26
 dig deep enough to see it.
27
28
 This module DOES NOT INCLUDE code to generate/apply the window
29
 function.  Everybody has their own weird favorite including me... I
30
 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31
 vehemently disagree.
32
33
 ********************************************************************/
34
35
#include "ivorbiscodec.h"
36
#include "codebook.h"
37
#include "misc.h"
38
#include "mdct.h"
39
#include "mdct_lookup.h"
40
41
42
/* 8 point butterfly (in place) */
43
395M
STIN void mdct_butterfly_8(DATA_TYPE *x){
44
45
395M
  REG_TYPE r0   = x[4] + x[0];
46
395M
  REG_TYPE r1   = x[4] - x[0];
47
395M
  REG_TYPE r2   = x[5] + x[1];
48
395M
  REG_TYPE r3   = x[5] - x[1];
49
395M
  REG_TYPE r4   = x[6] + x[2];
50
395M
  REG_TYPE r5   = x[6] - x[2];
51
395M
  REG_TYPE r6   = x[7] + x[3];
52
395M
  REG_TYPE r7   = x[7] - x[3];
53
54
395M
     x[0] = r5   + r3;
55
395M
     x[1] = r7   - r1;
56
395M
     x[2] = r5   - r3;
57
395M
     x[3] = r7   + r1;
58
395M
           x[4] = r4   - r0;
59
395M
     x[5] = r6   - r2;
60
395M
           x[6] = r4   + r0;
61
395M
     x[7] = r6   + r2;
62
395M
     MB();
63
395M
}
64
65
/* 16 point butterfly (in place, 4 register) */
66
197M
STIN void mdct_butterfly_16(DATA_TYPE *x){
67
68
197M
  REG_TYPE r0, r1;
69
70
197M
     r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
71
197M
     r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
72
197M
     x[ 0] = MULT31((r0 + r1) , cPI2_8);
73
197M
     x[ 1] = MULT31((r1 - r0) , cPI2_8);
74
197M
     MB();
75
76
197M
     r0 = x[10] - x[ 2]; x[10] += x[ 2];
77
197M
     r1 = x[ 3] - x[11]; x[11] += x[ 3];
78
197M
     x[ 2] = r1; x[ 3] = r0;
79
197M
     MB();
80
81
197M
     r0 = x[12] - x[ 4]; x[12] += x[ 4];
82
197M
     r1 = x[13] - x[ 5]; x[13] += x[ 5];
83
197M
     x[ 4] = MULT31((r0 - r1) , cPI2_8);
84
197M
     x[ 5] = MULT31((r0 + r1) , cPI2_8);
85
197M
     MB();
86
87
197M
     r0 = x[14] - x[ 6]; x[14] += x[ 6];
88
197M
     r1 = x[15] - x[ 7]; x[15] += x[ 7];
89
197M
     x[ 6] = r0; x[ 7] = r1;
90
197M
     MB();
91
92
197M
     mdct_butterfly_8(x);
93
197M
     mdct_butterfly_8(x+8);
94
197M
}
95
96
/* 32 point butterfly (in place, 4 register) */
97
98.7M
STIN void mdct_butterfly_32(DATA_TYPE *x){
98
99
98.7M
  REG_TYPE r0, r1;
100
101
98.7M
     r0 = x[30] - x[14]; x[30] += x[14];           
102
98.7M
     r1 = x[31] - x[15]; x[31] += x[15];
103
98.7M
     x[14] = r0; x[15] = r1;
104
98.7M
     MB();
105
106
98.7M
     r0 = x[28] - x[12]; x[28] += x[12];           
107
98.7M
     r1 = x[29] - x[13]; x[29] += x[13];
108
98.7M
     XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
109
98.7M
     MB();
110
111
98.7M
     r0 = x[26] - x[10]; x[26] += x[10];
112
98.7M
     r1 = x[27] - x[11]; x[27] += x[11];
113
98.7M
     x[10] = MULT31((r0 - r1) , cPI2_8);
114
98.7M
     x[11] = MULT31((r0 + r1) , cPI2_8);
115
98.7M
     MB();
116
117
98.7M
     r0 = x[24] - x[ 8]; x[24] += x[ 8];
118
98.7M
     r1 = x[25] - x[ 9]; x[25] += x[ 9];
119
98.7M
     XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
120
98.7M
     MB();
121
122
98.7M
     r0 = x[22] - x[ 6]; x[22] += x[ 6];
123
98.7M
     r1 = x[ 7] - x[23]; x[23] += x[ 7];
124
98.7M
     x[ 6] = r1; x[ 7] = r0;
125
98.7M
     MB();
126
127
98.7M
     r0 = x[ 4] - x[20]; x[20] += x[ 4];
128
98.7M
     r1 = x[ 5] - x[21]; x[21] += x[ 5];
129
98.7M
     XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
130
98.7M
     MB();
131
132
98.7M
     r0 = x[ 2] - x[18]; x[18] += x[ 2];
133
98.7M
     r1 = x[ 3] - x[19]; x[19] += x[ 3];
134
98.7M
     x[ 2] = MULT31((r1 + r0) , cPI2_8);
135
98.7M
     x[ 3] = MULT31((r1 - r0) , cPI2_8);
136
98.7M
     MB();
137
138
98.7M
     r0 = x[ 0] - x[16]; x[16] += x[ 0];
139
98.7M
     r1 = x[ 1] - x[17]; x[17] += x[ 1];
140
98.7M
     XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
141
98.7M
     MB();
142
143
98.7M
     mdct_butterfly_16(x);
144
98.7M
     mdct_butterfly_16(x+16);
145
98.7M
}
146
147
/* N/stage point generic N stage butterfly (in place, 2 register) */
148
95.8M
STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
149
150
95.8M
  LOOKUP_T *T   = sincos_lookup0;
151
95.8M
  DATA_TYPE *x1        = x + points      - 8;
152
95.8M
  DATA_TYPE *x2        = x + (points>>1) - 8;
153
95.8M
  REG_TYPE   r0;
154
95.8M
  REG_TYPE   r1;
155
156
312M
  do{
157
312M
    r0 = x1[6] - x2[6]; x1[6] += x2[6];
158
312M
    r1 = x2[7] - x1[7]; x1[7] += x2[7];
159
312M
    XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
160
161
312M
    r0 = x1[4] - x2[4]; x1[4] += x2[4];
162
312M
    r1 = x2[5] - x1[5]; x1[5] += x2[5];
163
312M
    XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
164
165
312M
    r0 = x1[2] - x2[2]; x1[2] += x2[2];
166
312M
    r1 = x2[3] - x1[3]; x1[3] += x2[3];
167
312M
    XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
168
169
312M
    r0 = x1[0] - x2[0]; x1[0] += x2[0];
170
312M
    r1 = x2[1] - x1[1]; x1[1] += x2[1];
171
312M
    XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
172
173
312M
    x1-=8; x2-=8;
174
312M
  }while(T<sincos_lookup0+1024);
175
312M
  do{
176
312M
    r0 = x1[6] - x2[6]; x1[6] += x2[6];
177
312M
    r1 = x1[7] - x2[7]; x1[7] += x2[7];
178
312M
    XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
179
180
312M
    r0 = x1[4] - x2[4]; x1[4] += x2[4];
181
312M
    r1 = x1[5] - x2[5]; x1[5] += x2[5];
182
312M
    XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
183
184
312M
    r0 = x1[2] - x2[2]; x1[2] += x2[2];
185
312M
    r1 = x1[3] - x2[3]; x1[3] += x2[3];
186
312M
    XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
187
188
312M
    r0 = x1[0] - x2[0]; x1[0] += x2[0];
189
312M
    r1 = x1[1] - x2[1]; x1[1] += x2[1];
190
312M
    XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
191
192
312M
    x1-=8; x2-=8;
193
312M
  }while(T>sincos_lookup0);
194
312M
  do{
195
312M
    r0 = x2[6] - x1[6]; x1[6] += x2[6];
196
312M
    r1 = x2[7] - x1[7]; x1[7] += x2[7];
197
312M
    XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
198
199
312M
    r0 = x2[4] - x1[4]; x1[4] += x2[4];
200
312M
    r1 = x2[5] - x1[5]; x1[5] += x2[5];
201
312M
    XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
202
203
312M
    r0 = x2[2] - x1[2]; x1[2] += x2[2];
204
312M
    r1 = x2[3] - x1[3]; x1[3] += x2[3];
205
312M
    XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
206
207
312M
    r0 = x2[0] - x1[0]; x1[0] += x2[0];
208
312M
    r1 = x2[1] - x1[1]; x1[1] += x2[1];
209
312M
    XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
210
211
312M
    x1-=8; x2-=8;
212
312M
  }while(T<sincos_lookup0+1024);
213
312M
  do{
214
312M
    r0 = x1[6] - x2[6]; x1[6] += x2[6];
215
312M
    r1 = x2[7] - x1[7]; x1[7] += x2[7];
216
312M
    XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
217
218
312M
    r0 = x1[4] - x2[4]; x1[4] += x2[4];
219
312M
    r1 = x2[5] - x1[5]; x1[5] += x2[5];
220
312M
    XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
221
222
312M
    r0 = x1[2] - x2[2]; x1[2] += x2[2];
223
312M
    r1 = x2[3] - x1[3]; x1[3] += x2[3];
224
312M
    XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
225
226
312M
    r0 = x1[0] - x2[0]; x1[0] += x2[0];
227
312M
    r1 = x2[1] - x1[1]; x1[1] += x2[1];
228
312M
    XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
229
230
312M
    x1-=8; x2-=8;
231
312M
  }while(T>sincos_lookup0);
232
95.8M
}
233
234
2.96M
STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
235
236
2.96M
  int stages=8-shift;
237
2.96M
  int i,j;
238
  
239
11.8M
  for(i=0;--stages>0;i++){
240
104M
    for(j=0;j<(1<<i);j++)
241
95.8M
      mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
242
8.88M
  }
243
244
101M
  for(j=0;j<points;j+=32)
245
98.7M
    mdct_butterfly_32(x+j);
246
247
2.96M
}
248
249
static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
250
251
790M
STIN int bitrev12(int x){
252
790M
  return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
253
790M
}
254
255
2.96M
STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
256
257
2.96M
  int          bit   = 0;
258
2.96M
  DATA_TYPE   *w0    = x;
259
2.96M
  DATA_TYPE   *w1    = x = w0+(n>>1);
260
2.96M
  LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
261
2.96M
  LOOKUP_T    *Ttop  = T+1024;
262
2.96M
  DATA_TYPE    r2;
263
264
197M
  do{
265
197M
    DATA_TYPE r3     = bitrev12(bit++);
266
197M
    DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
267
197M
    DATA_TYPE *x1    = x + (r3>>shift);
268
269
197M
    REG_TYPE  r0     = x0[0]  + x1[0];
270
197M
    REG_TYPE  r1     = x1[1]  - x0[1];
271
272
197M
        XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
273
274
197M
        w1    -= 4;
275
276
197M
        r0     = (x0[1] + x1[1])>>1;
277
197M
              r1     = (x0[0] - x1[0])>>1;
278
197M
        w0[0]  = r0     + r2;
279
197M
        w0[1]  = r1     + r3;
280
197M
        w1[2]  = r0     - r2;
281
197M
        w1[3]  = r3     - r1;
282
283
197M
        r3     = bitrev12(bit++);
284
197M
              x0     = x + ((r3 ^ 0xfff)>>shift) -1;
285
197M
              x1     = x + (r3>>shift);
286
287
197M
              r0     = x0[0]  + x1[0];
288
197M
              r1     = x1[1]  - x0[1];
289
290
197M
        XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
291
292
197M
              r0     = (x0[1] + x1[1])>>1;
293
197M
              r1     = (x0[0] - x1[0])>>1;
294
197M
        w0[2]  = r0     + r2;
295
197M
        w0[3]  = r1     + r3;
296
197M
        w1[0]  = r0     - r2;
297
197M
        w1[1]  = r3     - r1;
298
299
197M
        w0    += 4;
300
197M
  }while(T<Ttop);
301
197M
  do{
302
197M
    DATA_TYPE r3     = bitrev12(bit++);
303
197M
    DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
304
197M
    DATA_TYPE *x1    = x + (r3>>shift);
305
306
197M
    REG_TYPE  r0     = x0[0]  + x1[0];
307
197M
    REG_TYPE  r1     = x1[1]  - x0[1];
308
309
197M
        T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
310
311
197M
        w1    -= 4;
312
313
197M
        r0     = (x0[1] + x1[1])>>1;
314
197M
              r1     = (x0[0] - x1[0])>>1;
315
197M
        w0[0]  = r0     + r2;
316
197M
        w0[1]  = r1     + r3;
317
197M
        w1[2]  = r0     - r2;
318
197M
        w1[3]  = r3     - r1;
319
320
197M
        r3     = bitrev12(bit++);
321
197M
              x0     = x + ((r3 ^ 0xfff)>>shift) -1;
322
197M
              x1     = x + (r3>>shift);
323
324
197M
              r0     = x0[0]  + x1[0];
325
197M
              r1     = x1[1]  - x0[1];
326
327
197M
        T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
328
329
197M
              r0     = (x0[1] + x1[1])>>1;
330
197M
              r1     = (x0[0] - x1[0])>>1;
331
197M
        w0[2]  = r0     + r2;
332
197M
        w0[3]  = r1     + r3;
333
197M
        w1[0]  = r0     - r2;
334
197M
        w1[1]  = r3     - r1;
335
336
197M
        w0    += 4;
337
197M
  }while(w0<w1);
338
2.96M
}
339
340
2.96M
void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
341
2.96M
  int n2=n>>1;
342
2.96M
  int n4=n>>2;
343
2.96M
  DATA_TYPE *iX;
344
2.96M
  DATA_TYPE *oX;
345
2.96M
  LOOKUP_T *T;
346
2.96M
  LOOKUP_T *V;
347
2.96M
  int shift;
348
2.96M
  int step;
349
350
11.8M
  for (shift=6;!(n&(1<<shift));shift++);
351
2.96M
  shift=13-shift;
352
2.96M
  step=2<<shift;
353
   
354
  /* rotate */
355
356
2.96M
  iX            = in+n2-7;
357
2.96M
  oX            = out+n2+n4;
358
2.96M
  T             = sincos_lookup0;
359
360
197M
  do{
361
197M
    oX-=4;
362
197M
    XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
363
197M
    XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
364
197M
    iX-=8;
365
197M
  }while(iX>=in+n4);
366
197M
  do{
367
197M
    oX-=4;
368
197M
    XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
369
197M
    XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
370
197M
    iX-=8;
371
197M
  }while(iX>=in);
372
373
2.96M
  iX            = in+n2-8;
374
2.96M
  oX            = out+n2+n4;
375
2.96M
  T             = sincos_lookup0;
376
377
197M
  do{
378
197M
    T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
379
197M
    T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
380
197M
    iX-=8;
381
197M
    oX+=4;
382
197M
  }while(iX>=in+n4);
383
197M
  do{
384
197M
    T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
385
197M
    T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
386
197M
    iX-=8;
387
197M
    oX+=4;
388
197M
  }while(iX>=in);
389
390
2.96M
  mdct_butterflies(out+n2,n2,shift);
391
2.96M
  mdct_bitreverse(out,n,step,shift);
392
393
  /* rotate + window */
394
395
2.96M
  step>>=2;
396
2.96M
  {
397
2.96M
    DATA_TYPE *oX1=out+n2+n4;
398
2.96M
    DATA_TYPE *oX2=out+n2+n4;
399
2.96M
    DATA_TYPE *iX =out;
400
401
2.96M
    switch(step) {
402
2.20M
      default: {
403
2.20M
        T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
404
65.0M
        do{
405
65.0M
          oX1-=4;
406
65.0M
    XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
407
65.0M
    XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
408
65.0M
    XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
409
65.0M
    XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
410
65.0M
    oX2+=4;
411
65.0M
    iX+=8;
412
65.0M
  }while(iX<oX1);
413
2.20M
  break;
414
0
      }
415
416
226k
      case 1: {
417
        /* linear interpolation between table values: offset=0.5, step=1 */
418
226k
  REG_TYPE  t0,t1,v0,v1;
419
226k
        T         = sincos_lookup0;
420
226k
        V         = sincos_lookup1;
421
226k
  t0        = (*T++)>>1;
422
226k
  t1        = (*T++)>>1;
423
57.9M
        do{
424
57.9M
          oX1-=4;
425
426
57.9M
    t0 += (v0 = (*V++)>>1);
427
57.9M
    t1 += (v1 = (*V++)>>1);
428
57.9M
    XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
429
57.9M
    v0 += (t0 = (*T++)>>1);
430
57.9M
    v1 += (t1 = (*T++)>>1);
431
57.9M
    XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
432
57.9M
    t0 += (v0 = (*V++)>>1);
433
57.9M
    t1 += (v1 = (*V++)>>1);
434
57.9M
    XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
435
57.9M
    v0 += (t0 = (*T++)>>1);
436
57.9M
    v1 += (t1 = (*T++)>>1);
437
57.9M
    XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
438
439
57.9M
    oX2+=4;
440
57.9M
    iX+=8;
441
57.9M
  }while(iX<oX1);
442
226k
  break;
443
0
      }
444
445
531k
      case 0: {
446
        /* linear interpolation between table values: offset=0.25, step=0.5 */
447
531k
  REG_TYPE  t0,t1,v0,v1,q0,q1;
448
531k
        T         = sincos_lookup0;
449
531k
        V         = sincos_lookup1;
450
531k
  t0        = *T++;
451
531k
  t1        = *T++;
452
272M
        do{
453
272M
          oX1-=4;
454
455
272M
    v0  = *V++;
456
272M
    v1  = *V++;
457
272M
    t0 +=  (q0 = (v0-t0)>>2);
458
272M
    t1 +=  (q1 = (v1-t1)>>2);
459
272M
    XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
460
272M
    t0  = v0-q0;
461
272M
    t1  = v1-q1;
462
272M
    XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
463
464
272M
    t0  = *T++;
465
272M
    t1  = *T++;
466
272M
    v0 += (q0 = (t0-v0)>>2);
467
272M
    v1 += (q1 = (t1-v1)>>2);
468
272M
    XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
469
272M
    v0  = t0-q0;
470
272M
    v1  = t1-q1;
471
272M
    XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
472
473
272M
    oX2+=4;
474
272M
    iX+=8;
475
272M
  }while(iX<oX1);
476
531k
  break;
477
0
      }
478
2.96M
    }
479
480
2.96M
    iX=out+n2+n4;
481
2.96M
    oX1=out+n4;
482
2.96M
    oX2=oX1;
483
484
395M
    do{
485
395M
      oX1-=4;
486
395M
      iX-=4;
487
488
395M
      oX2[0] = -(oX1[3] = iX[3]);
489
395M
      oX2[1] = -(oX1[2] = iX[2]);
490
395M
      oX2[2] = -(oX1[1] = iX[1]);
491
395M
      oX2[3] = -(oX1[0] = iX[0]);
492
493
395M
      oX2+=4;
494
395M
    }while(oX2<iX);
495
496
2.96M
    iX=out+n2+n4;
497
2.96M
    oX1=out+n2+n4;
498
2.96M
    oX2=out+n2;
499
500
395M
    do{
501
395M
      oX1-=4;
502
395M
      oX1[0]= iX[3];
503
395M
      oX1[1]= iX[2];
504
395M
      oX1[2]= iX[1];
505
395M
      oX1[3]= iX[0];
506
395M
      iX+=4;
507
395M
    }while(oX1>oX2);
508
2.96M
  }
509
2.96M
}
510