Coverage Report

Created: 2025-06-22 07:10

/src/libjpeg-turbo/simd/x86_64/jsimd.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * jsimd_x86_64.c
3
 *
4
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2024, D. R. Commander.
6
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7
 *
8
 * Based on the x86 SIMD extension for IJG JPEG library,
9
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10
 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11
 *
12
 * This file contains the interface between the "normal" portions
13
 * of the library and the SIMD implementations when running on a
14
 * 64-bit x86 architecture.
15
 */
16
17
#define JPEG_INTERNALS
18
#include "../../src/jinclude.h"
19
#include "../../src/jpeglib.h"
20
#include "../../src/jsimd.h"
21
#include "../../src/jdct.h"
22
#include "../../src/jsimddct.h"
23
#include "../jsimd.h"
24
25
/*
26
 * In the PIC cases, we have no guarantee that constants will keep
27
 * their alignment. This macro allows us to verify it at runtime.
28
 */
29
0
#define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
30
31
0
#define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
0
#define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33
34
static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35
static THREAD_LOCAL unsigned int simd_huffman = 1;
36
37
/*
38
 * Check what SIMD accelerations are supported.
39
 */
40
LOCAL(void)
41
init_simd(void)
42
0
{
43
0
#ifndef NO_GETENV
44
0
  char env[2] = { 0 };
45
0
#endif
46
47
0
  if (simd_support != ~0U)
48
0
    return;
49
50
0
  simd_support = jpeg_simd_cpu_support();
51
52
0
#ifndef NO_GETENV
53
  /* Force different settings through environment variables */
54
0
  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
55
0
    simd_support &= JSIMD_SSE2;
56
0
  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
57
0
    simd_support &= JSIMD_AVX2;
58
0
  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
59
0
    simd_support = 0;
60
0
  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
61
0
    simd_huffman = 0;
62
0
#endif
63
0
}
64
65
GLOBAL(int)
66
jsimd_can_rgb_ycc(void)
67
0
{
68
0
  init_simd();
69
70
  /* The code is optimised for these values only */
71
0
  if (BITS_IN_JSAMPLE != 8)
72
0
    return 0;
73
0
  if (sizeof(JDIMENSION) != 4)
74
0
    return 0;
75
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
76
0
    return 0;
77
78
0
  if ((simd_support & JSIMD_AVX2) &&
79
0
      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
80
0
    return 1;
81
0
  if ((simd_support & JSIMD_SSE2) &&
82
0
      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
83
0
    return 1;
84
85
0
  return 0;
86
0
}
87
88
GLOBAL(int)
89
jsimd_can_rgb_gray(void)
90
0
{
91
0
  init_simd();
92
93
  /* The code is optimised for these values only */
94
0
  if (BITS_IN_JSAMPLE != 8)
95
0
    return 0;
96
0
  if (sizeof(JDIMENSION) != 4)
97
0
    return 0;
98
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
99
0
    return 0;
100
101
0
  if ((simd_support & JSIMD_AVX2) &&
102
0
      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
103
0
    return 1;
104
0
  if ((simd_support & JSIMD_SSE2) &&
105
0
      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
106
0
    return 1;
107
108
0
  return 0;
109
0
}
110
111
GLOBAL(int)
112
jsimd_can_ycc_rgb(void)
113
0
{
114
0
  init_simd();
115
116
  /* The code is optimised for these values only */
117
0
  if (BITS_IN_JSAMPLE != 8)
118
0
    return 0;
119
0
  if (sizeof(JDIMENSION) != 4)
120
0
    return 0;
121
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122
0
    return 0;
123
124
0
  if ((simd_support & JSIMD_AVX2) &&
125
0
      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
126
0
    return 1;
127
0
  if ((simd_support & JSIMD_SSE2) &&
128
0
      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
129
0
    return 1;
130
131
0
  return 0;
132
0
}
133
134
GLOBAL(int)
135
jsimd_can_ycc_rgb565(void)
136
0
{
137
0
  return 0;
138
0
}
139
140
GLOBAL(void)
141
jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
142
                      JSAMPIMAGE output_buf, JDIMENSION output_row,
143
                      int num_rows)
144
0
{
145
0
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146
0
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
147
148
0
  if (simd_support == ~0U)
149
0
    init_simd();
150
151
0
  switch (cinfo->in_color_space) {
152
0
  case JCS_EXT_RGB:
153
0
    avx2fct = jsimd_extrgb_ycc_convert_avx2;
154
0
    sse2fct = jsimd_extrgb_ycc_convert_sse2;
155
0
    break;
156
0
  case JCS_EXT_RGBX:
157
0
  case JCS_EXT_RGBA:
158
0
    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159
0
    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
160
0
    break;
161
0
  case JCS_EXT_BGR:
162
0
    avx2fct = jsimd_extbgr_ycc_convert_avx2;
163
0
    sse2fct = jsimd_extbgr_ycc_convert_sse2;
164
0
    break;
165
0
  case JCS_EXT_BGRX:
166
0
  case JCS_EXT_BGRA:
167
0
    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168
0
    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
169
0
    break;
170
0
  case JCS_EXT_XBGR:
171
0
  case JCS_EXT_ABGR:
172
0
    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173
0
    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
174
0
    break;
175
0
  case JCS_EXT_XRGB:
176
0
  case JCS_EXT_ARGB:
177
0
    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178
0
    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
179
0
    break;
180
0
  default:
181
0
    avx2fct = jsimd_rgb_ycc_convert_avx2;
182
0
    sse2fct = jsimd_rgb_ycc_convert_sse2;
183
0
    break;
184
0
  }
185
186
0
  if (simd_support & JSIMD_AVX2)
187
0
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188
0
  else
189
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190
0
}
191
192
GLOBAL(void)
193
jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194
                       JSAMPIMAGE output_buf, JDIMENSION output_row,
195
                       int num_rows)
196
0
{
197
0
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198
0
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
199
200
0
  if (simd_support == ~0U)
201
0
    init_simd();
202
203
0
  switch (cinfo->in_color_space) {
204
0
  case JCS_EXT_RGB:
205
0
    avx2fct = jsimd_extrgb_gray_convert_avx2;
206
0
    sse2fct = jsimd_extrgb_gray_convert_sse2;
207
0
    break;
208
0
  case JCS_EXT_RGBX:
209
0
  case JCS_EXT_RGBA:
210
0
    avx2fct = jsimd_extrgbx_gray_convert_avx2;
211
0
    sse2fct = jsimd_extrgbx_gray_convert_sse2;
212
0
    break;
213
0
  case JCS_EXT_BGR:
214
0
    avx2fct = jsimd_extbgr_gray_convert_avx2;
215
0
    sse2fct = jsimd_extbgr_gray_convert_sse2;
216
0
    break;
217
0
  case JCS_EXT_BGRX:
218
0
  case JCS_EXT_BGRA:
219
0
    avx2fct = jsimd_extbgrx_gray_convert_avx2;
220
0
    sse2fct = jsimd_extbgrx_gray_convert_sse2;
221
0
    break;
222
0
  case JCS_EXT_XBGR:
223
0
  case JCS_EXT_ABGR:
224
0
    avx2fct = jsimd_extxbgr_gray_convert_avx2;
225
0
    sse2fct = jsimd_extxbgr_gray_convert_sse2;
226
0
    break;
227
0
  case JCS_EXT_XRGB:
228
0
  case JCS_EXT_ARGB:
229
0
    avx2fct = jsimd_extxrgb_gray_convert_avx2;
230
0
    sse2fct = jsimd_extxrgb_gray_convert_sse2;
231
0
    break;
232
0
  default:
233
0
    avx2fct = jsimd_rgb_gray_convert_avx2;
234
0
    sse2fct = jsimd_rgb_gray_convert_sse2;
235
0
    break;
236
0
  }
237
238
0
  if (simd_support & JSIMD_AVX2)
239
0
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240
0
  else
241
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242
0
}
243
244
GLOBAL(void)
245
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
246
                      JDIMENSION input_row, JSAMPARRAY output_buf,
247
                      int num_rows)
248
0
{
249
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251
252
0
  if (simd_support == ~0U)
253
0
    init_simd();
254
255
0
  switch (cinfo->out_color_space) {
256
0
  case JCS_EXT_RGB:
257
0
    avx2fct = jsimd_ycc_extrgb_convert_avx2;
258
0
    sse2fct = jsimd_ycc_extrgb_convert_sse2;
259
0
    break;
260
0
  case JCS_EXT_RGBX:
261
0
  case JCS_EXT_RGBA:
262
0
    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
263
0
    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
264
0
    break;
265
0
  case JCS_EXT_BGR:
266
0
    avx2fct = jsimd_ycc_extbgr_convert_avx2;
267
0
    sse2fct = jsimd_ycc_extbgr_convert_sse2;
268
0
    break;
269
0
  case JCS_EXT_BGRX:
270
0
  case JCS_EXT_BGRA:
271
0
    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
272
0
    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
273
0
    break;
274
0
  case JCS_EXT_XBGR:
275
0
  case JCS_EXT_ABGR:
276
0
    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
277
0
    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
278
0
    break;
279
0
  case JCS_EXT_XRGB:
280
0
  case JCS_EXT_ARGB:
281
0
    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
282
0
    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
283
0
    break;
284
0
  default:
285
0
    avx2fct = jsimd_ycc_rgb_convert_avx2;
286
0
    sse2fct = jsimd_ycc_rgb_convert_sse2;
287
0
    break;
288
0
  }
289
290
0
  if (simd_support & JSIMD_AVX2)
291
0
    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292
0
  else
293
0
    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
294
0
}
295
296
GLOBAL(void)
297
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
298
                         JDIMENSION input_row, JSAMPARRAY output_buf,
299
                         int num_rows)
300
0
{
301
0
}
302
303
GLOBAL(int)
304
jsimd_can_h2v2_downsample(void)
305
0
{
306
0
  init_simd();
307
308
  /* The code is optimised for these values only */
309
0
  if (BITS_IN_JSAMPLE != 8)
310
0
    return 0;
311
0
  if (sizeof(JDIMENSION) != 4)
312
0
    return 0;
313
314
0
  if (simd_support & JSIMD_AVX2)
315
0
    return 1;
316
0
  if (simd_support & JSIMD_SSE2)
317
0
    return 1;
318
319
0
  return 0;
320
0
}
321
322
GLOBAL(int)
323
jsimd_can_h2v1_downsample(void)
324
0
{
325
0
  init_simd();
326
327
  /* The code is optimised for these values only */
328
0
  if (BITS_IN_JSAMPLE != 8)
329
0
    return 0;
330
0
  if (sizeof(JDIMENSION) != 4)
331
0
    return 0;
332
333
0
  if (simd_support & JSIMD_AVX2)
334
0
    return 1;
335
0
  if (simd_support & JSIMD_SSE2)
336
0
    return 1;
337
338
0
  return 0;
339
0
}
340
341
GLOBAL(void)
342
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
343
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
344
0
{
345
0
  if (simd_support == ~0U)
346
0
    init_simd();
347
348
0
  if (simd_support & JSIMD_AVX2)
349
0
    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
350
0
                               compptr->v_samp_factor,
351
0
                               compptr->width_in_blocks, input_data,
352
0
                               output_data);
353
0
  else
354
0
    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355
0
                               compptr->v_samp_factor,
356
0
                               compptr->width_in_blocks, input_data,
357
0
                               output_data);
358
0
}
359
360
GLOBAL(void)
361
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
362
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
363
0
{
364
0
  if (simd_support == ~0U)
365
0
    init_simd();
366
367
0
  if (simd_support & JSIMD_AVX2)
368
0
    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
369
0
                               compptr->v_samp_factor,
370
0
                               compptr->width_in_blocks, input_data,
371
0
                               output_data);
372
0
  else
373
0
    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
374
0
                               compptr->v_samp_factor,
375
0
                               compptr->width_in_blocks, input_data,
376
0
                               output_data);
377
0
}
378
379
GLOBAL(int)
380
jsimd_can_h2v2_upsample(void)
381
0
{
382
0
  init_simd();
383
384
  /* The code is optimised for these values only */
385
0
  if (BITS_IN_JSAMPLE != 8)
386
0
    return 0;
387
0
  if (sizeof(JDIMENSION) != 4)
388
0
    return 0;
389
390
0
  if (simd_support & JSIMD_AVX2)
391
0
    return 1;
392
0
  if (simd_support & JSIMD_SSE2)
393
0
    return 1;
394
395
0
  return 0;
396
0
}
397
398
GLOBAL(int)
399
jsimd_can_h2v1_upsample(void)
400
0
{
401
0
  init_simd();
402
403
  /* The code is optimised for these values only */
404
0
  if (BITS_IN_JSAMPLE != 8)
405
0
    return 0;
406
0
  if (sizeof(JDIMENSION) != 4)
407
0
    return 0;
408
409
0
  if (simd_support & JSIMD_AVX2)
410
0
    return 1;
411
0
  if (simd_support & JSIMD_SSE2)
412
0
    return 1;
413
414
0
  return 0;
415
0
}
416
417
GLOBAL(void)
418
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
420
0
{
421
0
  if (simd_support == ~0U)
422
0
    init_simd();
423
424
0
  if (simd_support & JSIMD_AVX2)
425
0
    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
426
0
                             input_data, output_data_ptr);
427
0
  else
428
0
    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429
0
                             input_data, output_data_ptr);
430
0
}
431
432
GLOBAL(void)
433
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
434
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
435
0
{
436
0
  if (simd_support == ~0U)
437
0
    init_simd();
438
439
0
  if (simd_support & JSIMD_AVX2)
440
0
    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
441
0
                             input_data, output_data_ptr);
442
0
  else
443
0
    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
444
0
                             input_data, output_data_ptr);
445
0
}
446
447
GLOBAL(int)
448
jsimd_can_h2v2_fancy_upsample(void)
449
0
{
450
0
  init_simd();
451
452
  /* The code is optimised for these values only */
453
0
  if (BITS_IN_JSAMPLE != 8)
454
0
    return 0;
455
0
  if (sizeof(JDIMENSION) != 4)
456
0
    return 0;
457
458
0
  if ((simd_support & JSIMD_AVX2) &&
459
0
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
460
0
    return 1;
461
0
  if ((simd_support & JSIMD_SSE2) &&
462
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463
0
    return 1;
464
465
0
  return 0;
466
0
}
467
468
GLOBAL(int)
469
jsimd_can_h2v1_fancy_upsample(void)
470
0
{
471
0
  init_simd();
472
473
  /* The code is optimised for these values only */
474
0
  if (BITS_IN_JSAMPLE != 8)
475
0
    return 0;
476
0
  if (sizeof(JDIMENSION) != 4)
477
0
    return 0;
478
479
0
  if ((simd_support & JSIMD_AVX2) &&
480
0
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
481
0
    return 1;
482
0
  if ((simd_support & JSIMD_SSE2) &&
483
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
484
0
    return 1;
485
486
0
  return 0;
487
0
}
488
489
GLOBAL(void)
490
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
492
0
{
493
0
  if (simd_support == ~0U)
494
0
    init_simd();
495
496
0
  if (simd_support & JSIMD_AVX2)
497
0
    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
498
0
                                   compptr->downsampled_width, input_data,
499
0
                                   output_data_ptr);
500
0
  else
501
0
    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
502
0
                                   compptr->downsampled_width, input_data,
503
0
                                   output_data_ptr);
504
0
}
505
506
GLOBAL(void)
507
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
508
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
509
0
{
510
0
  if (simd_support == ~0U)
511
0
    init_simd();
512
513
0
  if (simd_support & JSIMD_AVX2)
514
0
    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
515
0
                                   compptr->downsampled_width, input_data,
516
0
                                   output_data_ptr);
517
0
  else
518
0
    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
519
0
                                   compptr->downsampled_width, input_data,
520
0
                                   output_data_ptr);
521
0
}
522
523
GLOBAL(int)
524
jsimd_can_h2v2_merged_upsample(void)
525
0
{
526
0
  init_simd();
527
528
  /* The code is optimised for these values only */
529
0
  if (BITS_IN_JSAMPLE != 8)
530
0
    return 0;
531
0
  if (sizeof(JDIMENSION) != 4)
532
0
    return 0;
533
534
0
  if ((simd_support & JSIMD_AVX2) &&
535
0
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
536
0
    return 1;
537
0
  if ((simd_support & JSIMD_SSE2) &&
538
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
539
0
    return 1;
540
541
0
  return 0;
542
0
}
543
544
GLOBAL(int)
545
jsimd_can_h2v1_merged_upsample(void)
546
0
{
547
0
  init_simd();
548
549
  /* The code is optimised for these values only */
550
0
  if (BITS_IN_JSAMPLE != 8)
551
0
    return 0;
552
0
  if (sizeof(JDIMENSION) != 4)
553
0
    return 0;
554
555
0
  if ((simd_support & JSIMD_AVX2) &&
556
0
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
557
0
    return 1;
558
0
  if ((simd_support & JSIMD_SSE2) &&
559
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
560
0
    return 1;
561
562
0
  return 0;
563
0
}
564
565
GLOBAL(void)
566
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
567
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
568
0
{
569
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
570
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
571
572
0
  if (simd_support == ~0U)
573
0
    init_simd();
574
575
0
  switch (cinfo->out_color_space) {
576
0
  case JCS_EXT_RGB:
577
0
    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
578
0
    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
579
0
    break;
580
0
  case JCS_EXT_RGBX:
581
0
  case JCS_EXT_RGBA:
582
0
    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
583
0
    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
584
0
    break;
585
0
  case JCS_EXT_BGR:
586
0
    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
587
0
    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
588
0
    break;
589
0
  case JCS_EXT_BGRX:
590
0
  case JCS_EXT_BGRA:
591
0
    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
592
0
    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
593
0
    break;
594
0
  case JCS_EXT_XBGR:
595
0
  case JCS_EXT_ABGR:
596
0
    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
597
0
    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
598
0
    break;
599
0
  case JCS_EXT_XRGB:
600
0
  case JCS_EXT_ARGB:
601
0
    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
602
0
    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
603
0
    break;
604
0
  default:
605
0
    avx2fct = jsimd_h2v2_merged_upsample_avx2;
606
0
    sse2fct = jsimd_h2v2_merged_upsample_sse2;
607
0
    break;
608
0
  }
609
610
0
  if (simd_support & JSIMD_AVX2)
611
0
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
612
0
  else
613
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
614
0
}
615
616
GLOBAL(void)
617
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
618
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
619
0
{
620
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
621
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
622
623
0
  if (simd_support == ~0U)
624
0
    init_simd();
625
626
0
  switch (cinfo->out_color_space) {
627
0
  case JCS_EXT_RGB:
628
0
    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
629
0
    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
630
0
    break;
631
0
  case JCS_EXT_RGBX:
632
0
  case JCS_EXT_RGBA:
633
0
    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
634
0
    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
635
0
    break;
636
0
  case JCS_EXT_BGR:
637
0
    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
638
0
    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
639
0
    break;
640
0
  case JCS_EXT_BGRX:
641
0
  case JCS_EXT_BGRA:
642
0
    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
643
0
    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
644
0
    break;
645
0
  case JCS_EXT_XBGR:
646
0
  case JCS_EXT_ABGR:
647
0
    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
648
0
    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
649
0
    break;
650
0
  case JCS_EXT_XRGB:
651
0
  case JCS_EXT_ARGB:
652
0
    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
653
0
    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
654
0
    break;
655
0
  default:
656
0
    avx2fct = jsimd_h2v1_merged_upsample_avx2;
657
0
    sse2fct = jsimd_h2v1_merged_upsample_sse2;
658
0
    break;
659
0
  }
660
661
0
  if (simd_support & JSIMD_AVX2)
662
0
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663
0
  else
664
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
665
0
}
666
667
GLOBAL(int)
668
jsimd_can_convsamp(void)
669
0
{
670
0
  init_simd();
671
672
  /* The code is optimised for these values only */
673
0
  if (DCTSIZE != 8)
674
0
    return 0;
675
0
  if (BITS_IN_JSAMPLE != 8)
676
0
    return 0;
677
0
  if (sizeof(JDIMENSION) != 4)
678
0
    return 0;
679
0
  if (sizeof(DCTELEM) != 2)
680
0
    return 0;
681
682
0
  if (simd_support & JSIMD_AVX2)
683
0
    return 1;
684
0
  if (simd_support & JSIMD_SSE2)
685
0
    return 1;
686
687
0
  return 0;
688
0
}
689
690
GLOBAL(int)
691
jsimd_can_convsamp_float(void)
692
0
{
693
0
  init_simd();
694
695
  /* The code is optimised for these values only */
696
0
  if (DCTSIZE != 8)
697
0
    return 0;
698
0
  if (BITS_IN_JSAMPLE != 8)
699
0
    return 0;
700
0
  if (sizeof(JDIMENSION) != 4)
701
0
    return 0;
702
0
  if (sizeof(FAST_FLOAT) != 4)
703
0
    return 0;
704
705
0
  if (simd_support & JSIMD_SSE2)
706
0
    return 1;
707
708
0
  return 0;
709
0
}
710
711
GLOBAL(void)
712
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
713
               DCTELEM *workspace)
714
0
{
715
0
  if (simd_support == ~0U)
716
0
    init_simd();
717
718
0
  if (simd_support & JSIMD_AVX2)
719
0
    jsimd_convsamp_avx2(sample_data, start_col, workspace);
720
0
  else
721
0
    jsimd_convsamp_sse2(sample_data, start_col, workspace);
722
0
}
723
724
GLOBAL(void)
725
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
726
                     FAST_FLOAT *workspace)
727
0
{
728
0
  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
729
0
}
730
731
GLOBAL(int)
732
jsimd_can_fdct_islow(void)
733
0
{
734
0
  init_simd();
735
736
  /* The code is optimised for these values only */
737
0
  if (DCTSIZE != 8)
738
0
    return 0;
739
0
  if (sizeof(DCTELEM) != 2)
740
0
    return 0;
741
742
0
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
743
0
    return 1;
744
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
745
0
    return 1;
746
747
0
  return 0;
748
0
}
749
750
GLOBAL(int)
751
jsimd_can_fdct_ifast(void)
752
0
{
753
0
  init_simd();
754
755
  /* The code is optimised for these values only */
756
0
  if (DCTSIZE != 8)
757
0
    return 0;
758
0
  if (sizeof(DCTELEM) != 2)
759
0
    return 0;
760
761
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
762
0
    return 1;
763
764
0
  return 0;
765
0
}
766
767
GLOBAL(int)
768
jsimd_can_fdct_float(void)
769
0
{
770
0
  init_simd();
771
772
  /* The code is optimised for these values only */
773
0
  if (DCTSIZE != 8)
774
0
    return 0;
775
0
  if (sizeof(FAST_FLOAT) != 4)
776
0
    return 0;
777
778
0
  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
779
0
    return 1;
780
781
0
  return 0;
782
0
}
783
784
GLOBAL(void)
785
jsimd_fdct_islow(DCTELEM *data)
786
0
{
787
0
  if (simd_support == ~0U)
788
0
    init_simd();
789
790
0
  if (simd_support & JSIMD_AVX2)
791
0
    jsimd_fdct_islow_avx2(data);
792
0
  else
793
0
    jsimd_fdct_islow_sse2(data);
794
0
}
795
796
GLOBAL(void)
797
jsimd_fdct_ifast(DCTELEM *data)
798
0
{
799
0
  jsimd_fdct_ifast_sse2(data);
800
0
}
801
802
GLOBAL(void)
803
jsimd_fdct_float(FAST_FLOAT *data)
804
0
{
805
0
  jsimd_fdct_float_sse(data);
806
0
}
807
808
GLOBAL(int)
809
jsimd_can_quantize(void)
810
0
{
811
0
  init_simd();
812
813
  /* The code is optimised for these values only */
814
0
  if (DCTSIZE != 8)
815
0
    return 0;
816
0
  if (sizeof(JCOEF) != 2)
817
0
    return 0;
818
0
  if (sizeof(DCTELEM) != 2)
819
0
    return 0;
820
821
0
  if (simd_support & JSIMD_AVX2)
822
0
    return 1;
823
0
  if (simd_support & JSIMD_SSE2)
824
0
    return 1;
825
826
0
  return 0;
827
0
}
828
829
GLOBAL(int)
830
jsimd_can_quantize_float(void)
831
0
{
832
0
  init_simd();
833
834
  /* The code is optimised for these values only */
835
0
  if (DCTSIZE != 8)
836
0
    return 0;
837
0
  if (sizeof(JCOEF) != 2)
838
0
    return 0;
839
0
  if (sizeof(FAST_FLOAT) != 4)
840
0
    return 0;
841
842
0
  if (simd_support & JSIMD_SSE2)
843
0
    return 1;
844
845
0
  return 0;
846
0
}
847
848
GLOBAL(void)
849
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
850
0
{
851
0
  if (simd_support == ~0U)
852
0
    init_simd();
853
854
0
  if (simd_support & JSIMD_AVX2)
855
0
    jsimd_quantize_avx2(coef_block, divisors, workspace);
856
0
  else
857
0
    jsimd_quantize_sse2(coef_block, divisors, workspace);
858
0
}
859
860
GLOBAL(void)
861
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
862
                     FAST_FLOAT *workspace)
863
0
{
864
0
  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
865
0
}
866
867
GLOBAL(int)
868
jsimd_can_idct_2x2(void)
869
0
{
870
0
  init_simd();
871
872
  /* The code is optimised for these values only */
873
0
  if (DCTSIZE != 8)
874
0
    return 0;
875
0
  if (sizeof(JCOEF) != 2)
876
0
    return 0;
877
0
  if (BITS_IN_JSAMPLE != 8)
878
0
    return 0;
879
0
  if (sizeof(JDIMENSION) != 4)
880
0
    return 0;
881
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
882
0
    return 0;
883
884
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
885
0
    return 1;
886
887
0
  return 0;
888
0
}
889
890
GLOBAL(int)
891
jsimd_can_idct_4x4(void)
892
0
{
893
0
  init_simd();
894
895
  /* The code is optimised for these values only */
896
0
  if (DCTSIZE != 8)
897
0
    return 0;
898
0
  if (sizeof(JCOEF) != 2)
899
0
    return 0;
900
0
  if (BITS_IN_JSAMPLE != 8)
901
0
    return 0;
902
0
  if (sizeof(JDIMENSION) != 4)
903
0
    return 0;
904
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
905
0
    return 0;
906
907
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
908
0
    return 1;
909
910
0
  return 0;
911
0
}
912
913
GLOBAL(void)
914
jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
915
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
916
               JDIMENSION output_col)
917
0
{
918
0
  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
919
0
}
920
921
GLOBAL(void)
922
jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
923
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
924
               JDIMENSION output_col)
925
0
{
926
0
  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
927
0
}
928
929
GLOBAL(int)
930
jsimd_can_idct_islow(void)
931
0
{
932
0
  init_simd();
933
934
  /* The code is optimised for these values only */
935
0
  if (DCTSIZE != 8)
936
0
    return 0;
937
0
  if (sizeof(JCOEF) != 2)
938
0
    return 0;
939
0
  if (BITS_IN_JSAMPLE != 8)
940
0
    return 0;
941
0
  if (sizeof(JDIMENSION) != 4)
942
0
    return 0;
943
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
944
0
    return 0;
945
946
0
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
947
0
    return 1;
948
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
949
0
    return 1;
950
951
0
  return 0;
952
0
}
953
954
GLOBAL(int)
955
jsimd_can_idct_ifast(void)
956
0
{
957
0
  init_simd();
958
959
  /* The code is optimised for these values only */
960
0
  if (DCTSIZE != 8)
961
0
    return 0;
962
0
  if (sizeof(JCOEF) != 2)
963
0
    return 0;
964
0
  if (BITS_IN_JSAMPLE != 8)
965
0
    return 0;
966
0
  if (sizeof(JDIMENSION) != 4)
967
0
    return 0;
968
0
  if (sizeof(IFAST_MULT_TYPE) != 2)
969
0
    return 0;
970
0
  if (IFAST_SCALE_BITS != 2)
971
0
    return 0;
972
973
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
974
0
    return 1;
975
976
0
  return 0;
977
0
}
978
979
GLOBAL(int)
980
jsimd_can_idct_float(void)
981
0
{
982
0
  init_simd();
983
984
0
  if (DCTSIZE != 8)
985
0
    return 0;
986
0
  if (sizeof(JCOEF) != 2)
987
0
    return 0;
988
0
  if (BITS_IN_JSAMPLE != 8)
989
0
    return 0;
990
0
  if (sizeof(JDIMENSION) != 4)
991
0
    return 0;
992
0
  if (sizeof(FAST_FLOAT) != 4)
993
0
    return 0;
994
0
  if (sizeof(FLOAT_MULT_TYPE) != 4)
995
0
    return 0;
996
997
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
998
0
    return 1;
999
1000
0
  return 0;
1001
0
}
1002
1003
GLOBAL(void)
1004
jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1005
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1006
                 JDIMENSION output_col)
1007
0
{
1008
0
  if (simd_support == ~0U)
1009
0
    init_simd();
1010
1011
0
  if (simd_support & JSIMD_AVX2)
1012
0
    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1013
0
                          output_col);
1014
0
  else
1015
0
    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1016
0
                          output_col);
1017
0
}
1018
1019
GLOBAL(void)
1020
jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1021
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1022
                 JDIMENSION output_col)
1023
0
{
1024
0
  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1025
0
                        output_col);
1026
0
}
1027
1028
GLOBAL(void)
1029
jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1030
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1031
                 JDIMENSION output_col)
1032
0
{
1033
0
  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1034
0
                        output_col);
1035
0
}
1036
1037
GLOBAL(int)
1038
jsimd_can_huff_encode_one_block(void)
1039
0
{
1040
0
  init_simd();
1041
1042
0
  if (DCTSIZE != 8)
1043
0
    return 0;
1044
0
  if (sizeof(JCOEF) != 2)
1045
0
    return 0;
1046
1047
0
  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1048
0
      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1049
0
    return 1;
1050
1051
0
  return 0;
1052
0
}
1053
1054
GLOBAL(JOCTET *)
1055
jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1056
                            int last_dc_val, c_derived_tbl *dctbl,
1057
                            c_derived_tbl *actbl)
1058
0
{
1059
0
  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1060
0
                                          dctbl, actbl);
1061
0
}
1062
1063
GLOBAL(int)
1064
jsimd_can_encode_mcu_AC_first_prepare(void)
1065
0
{
1066
0
  init_simd();
1067
1068
0
  if (DCTSIZE != 8)
1069
0
    return 0;
1070
0
  if (sizeof(JCOEF) != 2)
1071
0
    return 0;
1072
0
  if (simd_support & JSIMD_SSE2)
1073
0
    return 1;
1074
1075
0
  return 0;
1076
0
}
1077
1078
GLOBAL(void)
1079
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1080
                                  const int *jpeg_natural_order_start, int Sl,
1081
                                  int Al, UJCOEF *values, size_t *zerobits)
1082
0
{
1083
0
  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1084
0
                                         Sl, Al, values, zerobits);
1085
0
}
1086
1087
GLOBAL(int)
1088
jsimd_can_encode_mcu_AC_refine_prepare(void)
1089
0
{
1090
0
  init_simd();
1091
1092
0
  if (DCTSIZE != 8)
1093
0
    return 0;
1094
0
  if (sizeof(JCOEF) != 2)
1095
0
    return 0;
1096
0
  if (simd_support & JSIMD_SSE2)
1097
0
    return 1;
1098
1099
0
  return 0;
1100
0
}
1101
1102
GLOBAL(int)
1103
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1104
                                   const int *jpeg_natural_order_start, int Sl,
1105
                                   int Al, UJCOEF *absvalues, size_t *bits)
1106
0
{
1107
0
  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1108
0
                                                 jpeg_natural_order_start,
1109
0
                                                 Sl, Al, absvalues, bits);
1110
0
}