Coverage Report

Created: 2024-11-21 06:52

/src/mpg123/src/libmpg123/optimize.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
  optimize: get a grip on the different optimizations
3
4
  copyright 2006-21 by the mpg123 project - free software under the terms of the LGPL 2.1
5
  see COPYING and AUTHORS files in distribution or http://mpg123.org
6
  initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7
8
  Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9
*/
10
11
#define I_AM_OPTIMIZE
12
#define WANT_GETCPUFLAGS
13
#include "mpg123lib_intern.h" /* includes optimize.h */
14
#include "getcpuflags.h"
15
#include "../common/debug.h"
16
17
18
/* Ugly macros to build conditional synth function array values. */
19
20
#ifndef NO_8BIT
21
#define IF8(synth) synth,
22
#else
23
#define IF8(synth)
24
#endif
25
26
#ifndef NO_SYNTH32
27
28
#ifndef NO_REAL
29
#define IFREAL(synth) synth,
30
#else
31
#define IFREAL(synth)
32
#endif
33
34
#ifndef NO_32BIT
35
#define IF32(synth) synth
36
#else
37
#define IF32(synth)
38
#endif
39
40
#else
41
42
#define IFREAL(synth)
43
#define IF32(synth)
44
45
#endif
46
47
#ifndef NO_16BIT
48
# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
49
#else
50
# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
51
#endif
52
53
/* The call of left and right plain synth, wrapped.
54
   This may be replaced by a direct stereo optimized synth. */
55
static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
56
0
{
57
0
  int clip;
58
0
  clip  = (fr->synth)(bandPtr_l, 0, fr, 0);
59
0
  clip += (fr->synth)(bandPtr_r, 1, fr, 1);
60
0
  return clip;
61
0
}
62
63
static const struct synth_s synth_base =
64
{
65
  { /* plain */
66
     OUT_SYNTHS(INT123_synth_1to1, INT123_synth_1to1_8bit, INT123_synth_1to1_real, INT123_synth_1to1_s32)
67
#   ifndef NO_DOWNSAMPLE
68
    ,OUT_SYNTHS(INT123_synth_2to1, INT123_synth_2to1_8bit, INT123_synth_2to1_real, INT123_synth_2to1_s32)
69
    ,OUT_SYNTHS(INT123_synth_4to1, INT123_synth_4to1_8bit, INT123_synth_4to1_real, INT123_synth_4to1_s32)
70
#   endif
71
#   ifndef NO_NTOM
72
    ,OUT_SYNTHS(INT123_synth_ntom, INT123_synth_ntom_8bit, INT123_synth_ntom_real, INT123_synth_ntom_s32)
73
#   endif
74
  },
75
  { /* stereo, by default only wrappers over plain synth */
76
     OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
77
#   ifndef NO_DOWNSAMPLE
78
    ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
79
    ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
80
#   endif
81
#   ifndef NO_NTOM
82
    ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
83
#   endif
84
  },
85
  { /* mono2stereo */
86
     OUT_SYNTHS(INT123_synth_1to1_m2s, INT123_synth_1to1_8bit_m2s, INT123_synth_1to1_real_m2s, INT123_synth_1to1_s32_m2s)
87
#   ifndef NO_DOWNSAMPLE
88
    ,OUT_SYNTHS(INT123_synth_2to1_m2s, INT123_synth_2to1_8bit_m2s, INT123_synth_2to1_real_m2s, INT123_synth_2to1_s32_m2s)
89
    ,OUT_SYNTHS(INT123_synth_4to1_m2s, INT123_synth_4to1_8bit_m2s, INT123_synth_4to1_real_m2s, INT123_synth_4to1_s32_m2s)
90
#   endif
91
#   ifndef NO_NTOM
92
    ,OUT_SYNTHS(INT123_synth_ntom_m2s, INT123_synth_ntom_8bit_m2s, INT123_synth_ntom_real_m2s, INT123_synth_ntom_s32_m2s)
93
#   endif
94
  },
95
  { /* mono*/
96
     OUT_SYNTHS(INT123_synth_1to1_mono, INT123_synth_1to1_8bit_mono, INT123_synth_1to1_real_mono, INT123_synth_1to1_s32_mono)
97
#   ifndef NO_DOWNSAMPLE
98
    ,OUT_SYNTHS(INT123_synth_2to1_mono, INT123_synth_2to1_8bit_mono, INT123_synth_2to1_real_mono, INT123_synth_2to1_s32_mono)
99
    ,OUT_SYNTHS(INT123_synth_4to1_mono, INT123_synth_4to1_8bit_mono, INT123_synth_4to1_real_mono, INT123_synth_4to1_s32_mono)
100
#   endif
101
#   ifndef NO_NTOM
102
    ,OUT_SYNTHS(INT123_synth_ntom_mono, INT123_synth_ntom_8bit_mono, INT123_synth_ntom_real_mono, INT123_synth_ntom_s32_mono)
103
#endif
104
  }
105
};
106
107
#ifdef OPT_X86
108
/* More plain synths for i386 */
109
const func_synth plain_i386[r_limit][f_limit] =
110
{ /* plain */
111
   OUT_SYNTHS(INT123_synth_1to1_i386, INT123_synth_1to1_8bit_i386, INT123_synth_1to1_real_i386, INT123_synth_1to1_s32_i386)
112
# ifndef NO_DOWNSAMPLE
113
  ,OUT_SYNTHS(INT123_synth_2to1_i386, INT123_synth_2to1_8bit_i386, INT123_synth_2to1_real_i386, INT123_synth_2to1_s32_i386)
114
  ,OUT_SYNTHS(INT123_synth_4to1_i386, INT123_synth_4to1_8bit_i386, INT123_synth_4to1_real_i386, INT123_synth_4to1_s32_i386)
115
# endif
116
# ifndef NO_NTOM
117
  ,OUT_SYNTHS(INT123_synth_ntom, INT123_synth_ntom_8bit, INT123_synth_ntom_real, INT123_synth_ntom_s32)
118
# endif
119
};
120
#endif
121
122
123
0
enum optdec INT123_defdec(void){ return defopt; }
124
125
enum optcla INT123_decclass(const enum optdec type)
126
0
{
127
0
  return
128
0
  (
129
0
       type == mmx
130
0
    || type == sse
131
0
    || type == sse_vintage
132
0
    || type == dreidnowext
133
0
    || type == dreidnowext_vintage
134
0
    || type == x86_64
135
0
    || type == neon
136
0
    || type == neon64
137
0
    || type == avx
138
0
  ) ? mmxsse : normal;
139
0
}
140
141
static int find_synth(func_synth synth,  const func_synth synths[r_limit][f_limit])
142
0
{
143
0
  enum synth_resample ri;
144
0
  enum synth_format   fi;
145
0
  for(ri=0; ri<r_limit; ++ri)
146
0
  for(fi=0; fi<f_limit; ++fi)
147
0
  if(synth == synths[ri][fi])
148
0
  return TRUE;
149
150
0
  return FALSE;
151
0
}
152
153
154
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
155
/* After knowing that it is either vintage or current SSE,
156
   this separates the two. In case of non-OPT_MULTI, only one
157
   of OPT_SSE and OPT_SSE_VINTAGE is active. */
158
static enum optdec sse_or_vintage(mpg123_handle *fr)
159
{
160
  enum optdec type;
161
  type = sse_vintage;
162
# ifdef OPT_SSE
163
# ifdef OPT_THE_DCT36
164
  if(INT123_dct36_match(fr, sse))
165
# endif
166
  type = sse;
167
# endif
168
  return type;
169
}
170
#endif
171
172
/* Determine what kind of decoder is actually active
173
   This depends on runtime choices which may cause fallback to i386 or generic code. */
174
static int find_dectype(mpg123_handle *fr)
175
0
{
176
0
  enum optdec type = nodec;
177
  /* Direct and indirect usage, 1to1 stereo decoding.
178
     Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
179
0
  func_synth basic_synth = fr->synth;
180
0
#ifndef NO_8BIT
181
0
#ifndef NO_16BIT
182
0
  if(basic_synth == INT123_synth_1to1_8bit_wrap)
183
0
  basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
184
0
#endif
185
0
#endif
186
187
0
  if(FALSE) ; /* Just to initialize the else if ladder. */
188
0
#ifndef NO_16BIT
189
#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
190
  else if(basic_synth == INT123_synth_1to1_3dnowext)
191
  {
192
    type = dreidnowext;
193
#   ifdef OPT_3DNOWEXT_VINTAGE
194
#   ifdef OPT_MULTI
195
    if(INT123_dct36_match(fr, dreidnowext_vintage))
196
#   endif
197
    type = dreidnowext_vintage;
198
#   endif
199
  }
200
#endif
201
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
202
  else if(basic_synth == INT123_synth_1to1_sse)
203
  {
204
    type = sse_or_vintage(fr);
205
  }
206
#endif
207
#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
208
  else if(basic_synth == INT123_synth_1to1_3dnow)
209
  {
210
    type = dreidnow;
211
#   ifdef OPT_3DNOW_VINTAGE
212
#   ifdef OPT_MULTI
213
    if(INT123_dct36_match(fr, dreidnow_vintage))
214
#   endif
215
    type = dreidnow_vintage;
216
#   endif
217
  }
218
#endif
219
#ifdef OPT_MMX
220
  else if(basic_synth == INT123_synth_1to1_mmx) type = mmx;
221
#endif
222
#ifdef OPT_I586_DITHER
223
  else if(basic_synth == INT123_synth_1to1_i586_dither) type = ifuenf_dither;
224
#endif
225
#ifdef OPT_I586
226
  else if(basic_synth == INT123_synth_1to1_i586) type = ifuenf;
227
#endif
228
#ifdef OPT_ALTIVEC
229
  else if(basic_synth == INT123_synth_1to1_altivec) type = altivec;
230
#endif
231
0
#ifdef OPT_X86_64
232
0
  else if(basic_synth == INT123_synth_1to1_x86_64) type = x86_64;
233
0
#endif
234
0
#ifdef OPT_AVX
235
0
  else if(basic_synth == INT123_synth_1to1_avx) type = avx;
236
0
#endif
237
#ifdef OPT_ARM
238
  else if(basic_synth == INT123_synth_1to1_arm) type = arm;
239
#endif
240
#ifdef OPT_NEON
241
  else if(basic_synth == INT123_synth_1to1_neon) type = neon;
242
#endif
243
#ifdef OPT_NEON64
244
  else if(basic_synth == INT123_synth_1to1_neon64) type = neon64;
245
#endif
246
0
#ifdef OPT_GENERIC_DITHER
247
0
  else if(basic_synth == INT123_synth_1to1_dither) type = generic_dither;
248
0
#endif
249
0
#ifdef OPT_DITHER /* either i586 or generic! */
250
0
#ifndef NO_DOWNSAMPLE
251
0
  else if
252
0
  (
253
0
       basic_synth == INT123_synth_2to1_dither
254
0
    || basic_synth == INT123_synth_4to1_dither
255
0
  ) type = generic_dither;
256
0
#endif
257
0
#endif
258
0
#endif /* 16bit */
259
260
0
#ifndef NO_SYNTH32
261
262
0
#ifndef NO_REAL
263
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
264
  else if(basic_synth == INT123_synth_1to1_real_sse)
265
  {
266
    type = sse_or_vintage(fr);
267
  }
268
#endif
269
0
#ifdef OPT_X86_64
270
0
  else if(basic_synth == INT123_synth_1to1_real_x86_64) type = x86_64;
271
0
#endif
272
0
#ifdef OPT_AVX
273
0
  else if(basic_synth == INT123_synth_1to1_real_avx) type = avx;
274
0
#endif
275
#ifdef OPT_ALTIVEC
276
  else if(basic_synth == INT123_synth_1to1_real_altivec) type = altivec;
277
#endif
278
#ifdef OPT_NEON
279
  else if(basic_synth == INT123_synth_1to1_real_neon) type = neon;
280
#endif
281
#ifdef OPT_NEON64
282
  else if(basic_synth == INT123_synth_1to1_real_neon64) type = neon64;
283
#endif
284
285
0
#endif /* real */
286
287
0
#ifndef NO_32BIT
288
#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
289
  else if(basic_synth == INT123_synth_1to1_s32_sse)
290
  {
291
    type = sse_or_vintage(fr);
292
  }
293
#endif
294
0
#ifdef OPT_X86_64
295
0
  else if(basic_synth == INT123_synth_1to1_s32_x86_64) type = x86_64;
296
0
#endif
297
0
#ifdef OPT_AVX
298
0
  else if(basic_synth == INT123_synth_1to1_s32_avx) type = avx;
299
0
#endif
300
#ifdef OPT_ALTIVEC
301
  else if(basic_synth == INT123_synth_1to1_s32_altivec) type = altivec;
302
#endif
303
#ifdef OPT_NEON
304
  else if(basic_synth == INT123_synth_1to1_s32_neon) type = neon;
305
#endif
306
#ifdef OPT_NEON64
307
  else if(basic_synth == INT123_synth_1to1_s32_neon64) type = neon64;
308
#endif
309
0
#endif /* 32bit */
310
311
0
#endif /* any 32 bit synth */
312
313
#ifdef OPT_X86
314
  else if(find_synth(basic_synth, plain_i386))
315
  type = idrei;
316
#endif
317
318
0
  else if(find_synth(basic_synth, synth_base.plain))
319
0
  type = generic;
320
321
322
323
#ifdef OPT_I486
324
  /* i486 is special ... the specific code is in use for 16bit 1to1 stereo
325
     otherwise we have i386 active... but still, the distinction doesn't matter*/
326
  type = ivier;
327
#endif
328
329
0
  if(type != nodec)
330
0
  {
331
0
    fr->cpu_opts.type = type;
332
0
    fr->cpu_opts.class = INT123_decclass(type);
333
334
0
    debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
335
0
    return MPG123_OK;
336
0
  }
337
0
  else
338
0
  {
339
0
    if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
340
341
0
    fr->err = MPG123_BAD_DECODER_SETUP;
342
0
    return MPG123_ERR;
343
0
  }
344
0
}
345
346
/* set synth functions for current frame, optimizations handled by opt_* macros */
347
int INT123_set_synth_functions(mpg123_handle *fr)
348
0
{
349
0
  enum synth_resample resample = r_none;
350
0
  enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
351
352
  /* Select the basic output format, different from 16bit: 8bit, real. */
353
0
  if(FALSE){}
354
0
#ifndef NO_16BIT
355
0
  else if(fr->af.dec_enc & MPG123_ENC_16)
356
0
  basic_format = f_16;
357
0
#endif
358
0
#ifndef NO_8BIT
359
0
  else if(fr->af.dec_enc & MPG123_ENC_8)
360
0
  basic_format = f_8;
361
0
#endif
362
0
#ifndef NO_REAL
363
0
  else if(fr->af.dec_enc & MPG123_ENC_FLOAT)
364
0
  basic_format = f_real;
365
0
#endif
366
0
#ifndef NO_32BIT
367
  /* 24 bit integer means decoding to 32 bit first. */
368
0
  else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)
369
0
  basic_format = f_32;
370
0
#endif
371
372
  /* Make sure the chosen format is compiled into this lib. */
373
0
  if(basic_format == f_none)
374
0
  {
375
0
    if(NOQUIET) error("INT123_set_synth_functions: This output format is disabled in this build!");
376
377
0
    return -1;
378
0
  }
379
380
  /* Be explicit about downsampling variant. */
381
0
  switch(fr->down_sample)
382
0
  {
383
0
    case 0: resample = r_1to1; break;
384
0
#ifndef NO_DOWNSAMPLE
385
0
    case 1: resample = r_2to1; break;
386
0
    case 2: resample = r_4to1; break;
387
0
#endif
388
0
#ifndef NO_NTOM
389
0
    case 3: resample = r_ntom; break;
390
0
#endif
391
0
  }
392
393
0
  if(resample == r_none)
394
0
  {
395
0
    if(NOQUIET) error("INT123_set_synth_functions: This resampling mode is not supported in this build!");
396
397
0
    return -1;
398
0
  }
399
400
0
  debug2("selecting synth: resample=%i format=%i", resample, basic_format);
401
  /* Finally selecting the synth functions for stereo / mono. */
402
0
  fr->synth = fr->synths.plain[resample][basic_format];
403
0
  fr->synth_stereo = fr->synths.stereo[resample][basic_format];
404
0
  fr->synth_mono = fr->af.channels==2
405
0
    ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
406
0
    : fr->synths.mono[resample][basic_format];       /* Mono MPEG file decoded to mono. */
407
408
0
  if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
409
0
  {
410
0
    fr->err = MPG123_BAD_DECODER_SETUP;
411
0
    return MPG123_ERR;
412
0
  }
413
414
0
  if(INT123_frame_buffers(fr) != 0)
415
0
  {
416
0
    fr->err = MPG123_NO_BUFFERS;
417
0
    if(NOQUIET) error("Failed to set up decoder buffers!");
418
419
0
    return MPG123_ERR;
420
0
  }
421
422
0
#ifndef NO_8BIT
423
0
  if(basic_format == f_8)
424
0
  {
425
0
    if(INT123_make_conv16to8_table(fr) != 0)
426
0
    {
427
0
      if(NOQUIET) error("Failed to set up conv16to8 table!");
428
      /* it's a bit more work to get proper error propagation up */
429
0
      return -1;
430
0
    }
431
0
  }
432
0
#endif
433
434
0
#ifdef OPT_MMXORSSE
435
  /* Special treatment for MMX, SSE and 3DNowExt stuff.
436
     The real-decoding SSE for x86-64 uses normal tables! */
437
0
  if(fr->cpu_opts.class == mmxsse
438
0
# ifndef NO_REAL
439
0
     && basic_format != f_real
440
0
# endif
441
0
# ifndef NO_32BIT
442
0
     && basic_format != f_32
443
0
# endif
444
0
# ifdef ACCURATE_ROUNDING
445
0
     && fr->cpu_opts.type != sse
446
0
     && fr->cpu_opts.type != sse_vintage
447
0
     && fr->cpu_opts.type != x86_64
448
0
     && fr->cpu_opts.type != neon
449
0
     && fr->cpu_opts.type != neon64
450
0
     && fr->cpu_opts.type != avx
451
0
# endif
452
0
    )
453
0
  {
454
0
#ifndef NO_LAYER3
455
0
    INT123_init_layer3_stuff(fr, INT123_init_layer3_gainpow2_mmx);
456
0
#endif
457
0
#ifndef NO_LAYER12
458
0
    INT123_init_layer12_stuff(fr, INT123_init_layer12_table_mmx);
459
0
#endif
460
0
    fr->INT123_make_decode_tables = INT123_make_decode_tables_mmx;
461
0
  }
462
0
  else
463
0
#endif
464
0
  {
465
0
#ifndef NO_LAYER3
466
0
    INT123_init_layer3_stuff(fr, INT123_init_layer3_gainpow2);
467
0
#endif
468
0
#ifndef NO_LAYER12
469
0
    INT123_init_layer12_stuff(fr, INT123_init_layer12_table);
470
0
#endif
471
0
    fr->INT123_make_decode_tables = INT123_make_decode_tables;
472
0
  }
473
474
  /* We allocated the table buffers just now, so (re)create the tables. */
475
0
  fr->INT123_make_decode_tables(fr);
476
477
0
  return 0;
478
0
}
479
480
int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
481
0
{
482
0
  const char* chosen = ""; /* the chosen decoder opt as string */
483
0
  enum optdec want_dec = nodec;
484
0
  int done = 0;
485
0
  int auto_choose = 0;
486
0
#ifdef OPT_DITHER
487
0
  int dithered = FALSE; /* If some dithered decoder is chosen. */
488
0
#endif
489
490
0
  want_dec = INT123_dectype(cpu);
491
0
  auto_choose = want_dec == autodec;
492
  /* Fill whole array of synth functions with generic code first. */
493
0
  fr->synths = synth_base;
494
495
#ifndef OPT_MULTI
496
  {
497
    if(!auto_choose && want_dec != defopt)
498
    {
499
      if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
500
    }
501
    auto_choose = TRUE; /* There will be only one choice anyway. */
502
  }
503
#endif
504
505
0
  fr->cpu_opts.type = nodec;
506
  /* covers any i386+ cpu; they actually differ only in the INT123_synth_1to1 function, mostly... */
507
#ifdef OPT_X86
508
  if(cpu_i586(fr->cpu_flags))
509
  {
510
#   ifdef OPT_MULTI
511
    debug2("standard flags: 0x%08x\textended flags: 0x%08x", fr->cpu_flags.std, fr->cpu_flags.ext);
512
#   endif
513
#   ifdef OPT_SSE
514
    if(   !done && (auto_choose || want_dec == sse)
515
       && cpu_sse(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
516
    {
517
      chosen = dn_sse;
518
      fr->cpu_opts.type = sse;
519
#     ifndef NO_16BIT
520
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;
521
#     ifdef ACCURATE_ROUNDING
522
      fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_sse;
523
#     endif
524
#     endif
525
#     ifndef NO_REAL
526
      fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_sse;
527
      fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_sse;
528
#     endif
529
#     ifndef NO_32BIT
530
      fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_sse;
531
      fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_sse;
532
#     endif
533
      done = 1;
534
    }
535
#   endif
536
#   ifdef OPT_SSE_VINTAGE
537
    if(   !done && (auto_choose || want_dec == sse_vintage)
538
       && cpu_sse(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
539
    {
540
      chosen = dn_sse_vintage;
541
      fr->cpu_opts.type = sse_vintage;
542
#     ifndef NO_16BIT
543
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;
544
#     ifdef ACCURATE_ROUNDING
545
      fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_sse;
546
#     endif
547
#     endif
548
#     ifndef NO_REAL
549
      fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_sse;
550
      fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_sse;
551
#     endif
552
#     ifndef NO_32BIT
553
      fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_sse;
554
      fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_sse;
555
#     endif
556
      done = 1;
557
    }
558
#   endif
559
#   ifdef OPT_3DNOWEXT
560
    if(   !done && (auto_choose || want_dec == dreidnowext)
561
       && cpu_3dnow(fr->cpu_flags)
562
       && cpu_3dnowext(fr->cpu_flags)
563
       && cpu_mmx(fr->cpu_flags) )
564
    {
565
      chosen = dn_dreidnowext;
566
      fr->cpu_opts.type = dreidnowext;
567
#     ifndef NO_16BIT
568
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;
569
#     endif
570
      done = 1;
571
    }
572
#   endif
573
#   ifdef OPT_3DNOWEXT_VINTAGE
574
    if(   !done && (auto_choose || want_dec == dreidnowext_vintage)
575
       && cpu_3dnow(fr->cpu_flags)
576
       && cpu_3dnowext(fr->cpu_flags)
577
       && cpu_mmx(fr->cpu_flags) )
578
    {
579
      chosen = dn_dreidnowext_vintage;
580
      fr->cpu_opts.type = dreidnowext_vintage;
581
#     ifndef NO_16BIT
582
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;
583
#     endif
584
      done = 1;
585
    }
586
#   endif
587
#   ifdef OPT_3DNOW
588
    if(    !done && (auto_choose || want_dec == dreidnow)
589
        && cpu_3dnow(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
590
    {
591
      chosen = dn_dreidnow;
592
      fr->cpu_opts.type = dreidnow;
593
#     ifndef NO_16BIT
594
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;
595
#     endif
596
      done = 1;
597
    }
598
#   endif
599
#   ifdef OPT_3DNOW_VINTAGE
600
    if(    !done && (auto_choose || want_dec == dreidnow_vintage)
601
        && cpu_3dnow(fr->cpu_flags) && cpu_mmx(fr->cpu_flags) )
602
    {
603
      chosen = dn_dreidnow_vintage;
604
      fr->cpu_opts.type = dreidnow_vintage;
605
#     ifndef NO_16BIT
606
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;
607
#     endif
608
      done = 1;
609
    }
610
#   endif
611
    #ifdef OPT_MMX
612
    if(   !done && (auto_choose || want_dec == mmx)
613
       && cpu_mmx(fr->cpu_flags) )
614
    {
615
      chosen = dn_mmx;
616
      fr->cpu_opts.type = mmx;
617
#     ifndef NO_16BIT
618
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_mmx;
619
#     endif
620
      done = 1;
621
    }
622
    #endif
623
    #ifdef OPT_I586
624
    if(!done && (auto_choose || want_dec == ifuenf))
625
    {
626
      chosen = "i586/pentium";
627
      fr->cpu_opts.type = ifuenf;
628
#     ifndef NO_16BIT
629
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_i586;
630
#     endif
631
      done = 1;
632
    }
633
    #endif
634
    #ifdef OPT_I586_DITHER
635
    if(!done && (auto_choose || want_dec == ifuenf_dither))
636
    {
637
      chosen = "dithered i586/pentium";
638
      fr->cpu_opts.type = ifuenf_dither;
639
      dithered = TRUE;
640
#     ifndef NO_16BIT
641
      fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_i586_dither;
642
#     ifndef NO_DOWNSAMPLE
643
      fr->synths.plain[r_2to1][f_16] = INT123_synth_2to1_dither;
644
      fr->synths.plain[r_4to1][f_16] = INT123_synth_4to1_dither;
645
#     endif
646
#     endif
647
      done = 1;
648
    }
649
    #endif
650
  }
651
  #ifdef OPT_I486
652
  /* That won't cooperate in multi opt mode - forcing i486 in layer3.c
653
     But still... here it is... maybe for real use in future. */
654
  if(!done && (auto_choose || want_dec == ivier))
655
  {
656
    chosen = dn_ivier;
657
    fr->cpu_opts.type = ivier;
658
    done = 1;
659
  }
660
  #endif
661
  #ifdef OPT_I386
662
  if(!done && (auto_choose || want_dec == idrei))
663
  {
664
    chosen = dn_idrei;
665
    fr->cpu_opts.type = idrei;
666
    done = 1;
667
  }
668
  #endif
669
670
  if(done)
671
  {
672
    /*
673
      We have chosen some x86 decoder... fillup some i386 stuff.
674
      There is an open question about using dithered INT123_synth_1to1 for 8bit wrappers.
675
      For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
676
    */
677
    enum synth_resample ri;
678
    enum synth_format   fi;
679
#   ifndef NO_8BIT
680
#   ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
681
    if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
682
    {
683
      fr->synths.plain[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap;
684
      fr->synths.mono[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_mono;
685
      fr->synths.mono2stereo[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_m2s;
686
    }
687
#   endif
688
#   endif
689
    for(ri=0; ri<r_limit; ++ri)
690
    for(fi=0; fi<f_limit; ++fi)
691
    {
692
      if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
693
      fr->synths.plain[ri][fi] = plain_i386[ri][fi];
694
    }
695
  }
696
697
#endif /* OPT_X86 */
698
699
0
#ifdef OPT_AVX
700
0
  if(!done && (auto_choose || want_dec == avx) && cpu_avx(fr->cpu_flags))
701
0
  {
702
0
    chosen = "x86-64 (AVX)";
703
0
    fr->cpu_opts.type = avx;
704
0
#   ifndef NO_16BIT
705
0
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_avx;
706
0
    fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_avx;
707
0
#   endif
708
0
#   ifndef NO_REAL
709
0
    fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_avx;
710
0
    fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_avx;
711
0
#   endif
712
0
#   ifndef NO_32BIT
713
0
    fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_avx;
714
0
    fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_avx;
715
0
#   endif
716
0
    done = 1;
717
0
  }
718
0
#endif
719
720
0
#ifdef OPT_X86_64
721
0
  if(!done && (auto_choose || want_dec == x86_64))
722
0
  {
723
0
    chosen = "x86-64 (SSE)";
724
0
    fr->cpu_opts.type = x86_64;
725
0
#   ifndef NO_16BIT
726
0
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_x86_64;
727
0
    fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_x86_64;
728
0
#   endif
729
0
#   ifndef NO_REAL
730
0
    fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_x86_64;
731
0
    fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_x86_64;
732
0
#   endif
733
0
#   ifndef NO_32BIT
734
0
    fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_x86_64;
735
0
    fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_x86_64;
736
0
#   endif
737
0
    done = 1;
738
0
  }
739
0
#endif
740
741
# ifdef OPT_ALTIVEC
742
  if(!done && (auto_choose || want_dec == altivec))
743
  {
744
    chosen = dn_altivec;
745
    fr->cpu_opts.type = altivec;
746
#   ifndef NO_16BIT
747
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_altivec;
748
    fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_altivec;
749
#   endif
750
#   ifndef NO_REAL
751
    fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_altivec;
752
    fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_altivec;
753
#   endif
754
#   ifndef NO_32BIT
755
    fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_altivec;
756
    fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_altivec;
757
#   endif
758
    done = 1;
759
  }
760
# endif
761
762
# ifdef OPT_NEON
763
  if(!done && (auto_choose || want_dec == neon) && cpu_neon(fr->cpu_flags))
764
  {
765
    chosen = dn_neon;
766
    fr->cpu_opts.type = neon;
767
#   ifndef NO_16BIT
768
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon;
769
    fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon;
770
#   endif
771
#   ifndef NO_REAL
772
    fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_neon;
773
    fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_real_stereo_neon;
774
#   endif
775
#   ifndef NO_32BIT
776
    fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_neon;
777
    fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32_stereo_neon;
778
#   endif
779
    done = 1;
780
  }
781
# endif
782
783
# ifdef OPT_ARM
784
  if(!done && (auto_choose || want_dec == arm))
785
  {
786
    chosen = dn_arm;
787
    fr->cpu_opts.type = arm;
788
#   ifndef NO_16BIT
789
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_arm;
790
#   endif
791
    done = 1;
792
  }
793
# endif
794
795
# ifdef OPT_NEON64
796
  if(!done && (auto_choose || want_dec == neon64) && cpu_neon(fr->cpu_flags))
797
  {
798
    chosen = dn_neon64;
799
    fr->cpu_opts.type = neon64;
800
#   ifndef NO_16BIT
801
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon64;
802
    fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon64;
803
#   endif
804
#   ifndef NO_REAL
805
    fr->synths.plain[r_1to1][f_real] = INT123_synth_1to1_real_neon64;
806
    fr->synths.stereo[r_1to1][f_real] = INT123_synth_1to1_fltst_neon64;
807
#   endif
808
#   ifndef NO_32BIT
809
    fr->synths.plain[r_1to1][f_32] = INT123_synth_1to1_s32_neon64;
810
    fr->synths.stereo[r_1to1][f_32] = INT123_synth_1to1_s32st_neon64;
811
#   endif
812
    done = 1;
813
  }
814
# endif
815
816
0
# ifdef OPT_GENERIC
817
0
  if(!done && (auto_choose || want_dec == generic))
818
0
  {
819
0
    chosen = dn_generic;
820
0
    fr->cpu_opts.type = generic;
821
0
    done = 1;
822
0
  }
823
0
# endif
824
825
0
#ifdef OPT_GENERIC_DITHER
826
0
  if(!done && (auto_choose || want_dec == generic_dither))
827
0
  {
828
0
    chosen = "dithered generic";
829
0
    fr->cpu_opts.type = generic_dither;
830
0
    dithered = TRUE;
831
0
#   ifndef NO_16BIT
832
0
    fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_dither;
833
0
#   ifndef NO_DOWNSAMPLE
834
0
    fr->synths.plain[r_2to1][f_16] = INT123_synth_2to1_dither;
835
0
    fr->synths.plain[r_4to1][f_16] = INT123_synth_4to1_dither;
836
0
#   endif
837
0
#   endif
838
0
    done = 1;
839
0
  }
840
0
#endif
841
842
0
  fr->cpu_opts.class = INT123_decclass(fr->cpu_opts.type);
843
844
0
# ifndef NO_8BIT
845
0
# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
846
  /* Last chance to use some optimized routine via generic wrappers (for 8bit). */
847
0
  if(     fr->cpu_opts.type != ifuenf_dither
848
0
       && fr->cpu_opts.type != generic_dither
849
0
       && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
850
0
  {
851
0
    fr->synths.plain[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap;
852
0
    fr->synths.mono[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_mono;
853
0
    fr->synths.mono2stereo[r_1to1][f_8] = INT123_synth_1to1_8bit_wrap_m2s;
854
0
  }
855
0
# endif
856
0
# endif
857
858
0
#ifdef OPT_THE_DCT36
859
0
  INT123_dct36_choose(fr);
860
0
#endif
861
862
0
#ifdef OPT_DITHER
863
0
  if(done && dithered)
864
0
  {
865
    /* run-time dither noise table generation */
866
0
    if(!INT123_frame_dither_init(fr))
867
0
    {
868
0
      if(NOQUIET) error("Dither noise setup failed!");
869
0
      return 0;
870
0
    }
871
0
  }
872
0
#endif
873
874
0
  if(done)
875
0
  {
876
0
    if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
877
0
    return 1;
878
0
  }
879
0
  else
880
0
  {
881
0
    if(NOQUIET) error("Could not set optimization!");
882
0
    return 0;
883
0
  }
884
0
}
885
886
enum optdec INT123_dectype(const char* decoder)
887
0
{
888
0
  enum optdec dt;
889
0
  if(   (decoder == NULL)
890
0
     || (decoder[0] == 0) )
891
0
  return autodec;
892
893
0
  for(dt=autodec; dt<nodec; ++dt)
894
0
  if(!strcasecmp(decoder, decname[dt])) return dt;
895
896
0
  return nodec; /* If we found nothing... */
897
0
}
898
899
#ifdef OPT_MULTI
900
901
/* same number of entries as full list, but empty at beginning */
902
static const char *mpg123_supported_decoder_list[] =
903
{
904
  #ifdef OPT_SSE
905
  NULL,
906
  #endif
907
  #ifdef OPT_SSE_VINTAGE
908
  NULL,
909
  #endif
910
  #ifdef OPT_3DNOWEXT
911
  NULL,
912
  #endif
913
  #ifdef OPT_3DNOWEXT_VINTAGE
914
  NULL,
915
  #endif
916
  #ifdef OPT_3DNOW
917
  NULL,
918
  #endif
919
  #ifdef OPT_3DNOW_VINTAGE
920
  NULL,
921
  #endif
922
  #ifdef OPT_MMX
923
  NULL,
924
  #endif
925
  #ifdef OPT_I586
926
  NULL,
927
  #endif
928
  #ifdef OPT_I586_DITHER
929
  NULL,
930
  #endif
931
  #ifdef OPT_I486
932
  NULL,
933
  #endif
934
  #ifdef OPT_I386
935
  NULL,
936
  #endif
937
  #ifdef OPT_ALTIVEC
938
  NULL,
939
  #endif
940
  #ifdef OPT_AVX
941
  NULL,
942
  #endif
943
  #ifdef OPT_X86_64
944
  NULL,
945
  #endif
946
  #ifdef OPT_ARM
947
  NULL,
948
  #endif
949
  #ifdef OPT_NEON
950
  NULL,
951
  #endif
952
  #ifdef OPT_NEON64
953
  NULL,
954
  #endif
955
  #ifdef OPT_GENERIC_FLOAT
956
  NULL,
957
  #endif
958
# ifdef OPT_GENERIC
959
  NULL,
960
# endif
961
# ifdef OPT_GENERIC_DITHER
962
  NULL,
963
# endif
964
  NULL
965
};
966
#endif
967
968
static const char *mpg123_decoder_list[] =
969
{
970
  #ifdef OPT_SSE
971
  dn_sse,
972
  #endif
973
  #ifdef OPT_SSE_VINTAGE
974
  dn_sse_vintage,
975
  #endif
976
  #ifdef OPT_3DNOWEXT
977
  dn_dreidnowext,
978
  #endif
979
  #ifdef OPT_3DNOWEXT_VINTAGE
980
  dn_dreidnowext_vintage,
981
  #endif
982
  #ifdef OPT_3DNOW
983
  dn_dreidnow,
984
  #endif
985
  #ifdef OPT_3DNOW_VINTAGE
986
  dn_dreidnow_vintage,
987
  #endif
988
  #ifdef OPT_MMX
989
  dn_mmx,
990
  #endif
991
  #ifdef OPT_I586
992
  dn_ifuenf,
993
  #endif
994
  #ifdef OPT_I586_DITHER
995
  dn_ifuenf_dither,
996
  #endif
997
  #ifdef OPT_I486
998
  dn_ivier,
999
  #endif
1000
  #ifdef OPT_I386
1001
  dn_idrei,
1002
  #endif
1003
  #ifdef OPT_ALTIVEC
1004
  dn_altivec,
1005
  #endif
1006
  #ifdef OPT_AVX
1007
  dn_avx,
1008
  #endif
1009
  #ifdef OPT_X86_64
1010
  dn_x86_64,
1011
  #endif
1012
  #ifdef OPT_ARM
1013
  dn_arm,
1014
  #endif
1015
  #ifdef OPT_NEON
1016
  dn_neon,
1017
  #endif
1018
  #ifdef OPT_NEON64
1019
  dn_neon64,
1020
  #endif
1021
  #ifdef OPT_GENERIC
1022
  dn_generic,
1023
  #endif
1024
  #ifdef OPT_GENERIC_DITHER
1025
  dn_generic_dither,
1026
  #endif
1027
  NULL
1028
};
1029
1030
void check_decoders(void)
1031
0
{
1032
#ifndef OPT_MULTI
1033
  /* In non-multi mode, only the full list (one entry) is used. */
1034
  return;
1035
#else
1036
0
  const char **d = mpg123_supported_decoder_list;
1037
0
#ifdef OPT_CPU_FLAGS
1038
0
  struct cpuflags cpu_flags;
1039
0
  wrap_getcpuflags(&cpu_flags);
1040
0
#endif
1041
#ifdef OPT_X86
1042
  if(cpu_i586(cpu_flags))
1043
  {
1044
    /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
1045
    if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
1046
#ifdef OPT_SSE
1047
    if(cpu_sse(cpu_flags)) *(d++) = dn_sse;
1048
#endif
1049
#ifdef OPT_SSE_VINTAGE
1050
    if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;
1051
#endif
1052
#ifdef OPT_3DNOWEXT
1053
    if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;
1054
#endif
1055
#ifdef OPT_3DNOWEXT_VINTAGE
1056
    if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;
1057
#endif
1058
#ifdef OPT_3DNOW
1059
    if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;
1060
#endif
1061
#ifdef OPT_3DNOW_VINTAGE
1062
    if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;
1063
#endif
1064
#ifdef OPT_MMX
1065
    if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;
1066
#endif
1067
#ifdef OPT_I586
1068
    *(d++) = dn_ifuenf;
1069
#endif
1070
#ifdef OPT_I586_DITHER
1071
    *(d++) = dn_ifuenf_dither;
1072
#endif
1073
  }
1074
#endif
1075
/* just assume that the i486 built is run on a i486 cpu... */
1076
#ifdef OPT_I486
1077
  *(d++) = dn_ivier;
1078
#endif
1079
#ifdef OPT_ALTIVEC
1080
  *(d++) = dn_altivec;
1081
#endif
1082
/* every supported x86 can do i386, any cpu can do generic */
1083
#ifdef OPT_I386
1084
  *(d++) = dn_idrei;
1085
#endif
1086
0
#ifdef OPT_AVX
1087
0
  if(cpu_avx(cpu_flags)) *(d++) = dn_avx;
1088
0
#endif
1089
0
#ifdef OPT_X86_64
1090
0
  *(d++) = dn_x86_64;
1091
0
#endif
1092
#ifdef OPT_ARM
1093
  *(d++) = dn_arm;
1094
#endif
1095
#ifdef OPT_NEON
1096
  if(cpu_neon(cpu_flags)) *(d++) = dn_neon;
1097
#endif
1098
#ifdef OPT_NEON64
1099
  if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;
1100
#endif
1101
0
#ifdef OPT_GENERIC
1102
0
  *(d++) = dn_generic;
1103
0
#endif
1104
0
#ifdef OPT_GENERIC_DITHER
1105
0
  *(d++) = dn_generic_dither;
1106
0
#endif
1107
0
#endif /* ndef OPT_MULTI */
1108
0
}
1109
1110
const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
1111
0
{
1112
0
  if(mh == NULL) return NULL;
1113
1114
0
  return decname[mh->cpu_opts.type];
1115
0
}
1116
1117
0
const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
1118
const char attribute_align_arg **mpg123_supported_decoders(void)
1119
0
{
1120
0
  check_decoders();
1121
0
#ifdef OPT_MULTI
1122
0
  return mpg123_supported_decoder_list;
1123
#else
1124
  return mpg123_decoder_list;
1125
#endif
1126
0
}