Coverage Report

Created: 2025-08-28 06:31

/src/libjpeg-turbo/simd/x86_64/jsimd.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
3
 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2024, D. R. Commander.
4
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
5
 *
6
 * Based on the x86 SIMD extension for IJG JPEG library,
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * For conditions of distribution and use, see copyright notice in jsimdext.inc
9
 *
10
 * This file contains the interface between the "normal" portions
11
 * of the library and the SIMD implementations when running on a
12
 * 64-bit x86 architecture.
13
 */
14
15
#define JPEG_INTERNALS
16
#include "../../src/jinclude.h"
17
#include "../../src/jpeglib.h"
18
#include "../../src/jsimd.h"
19
#include "../../src/jdct.h"
20
#include "../../src/jsimddct.h"
21
#include "../jsimd.h"
22
23
/*
24
 * In the PIC cases, we have no guarantee that constants will keep
25
 * their alignment. This macro allows us to verify it at runtime.
26
 */
27
0
#define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
28
29
0
#define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
30
0
#define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
31
32
static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
33
static THREAD_LOCAL unsigned int simd_huffman = 1;
34
35
/*
36
 * Check what SIMD accelerations are supported.
37
 */
38
LOCAL(void)
39
init_simd(void)
40
3.24k
{
41
3.24k
#ifndef NO_GETENV
42
3.24k
  char env[2] = { 0 };
43
3.24k
#endif
44
45
3.24k
  if (simd_support != ~0U)
46
3.24k
    return;
47
48
1
  simd_support = jpeg_simd_cpu_support();
49
50
1
#ifndef NO_GETENV
51
  /* Force different settings through environment variables */
52
1
  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
53
0
    simd_support &= JSIMD_SSE2;
54
1
  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
55
0
    simd_support &= JSIMD_AVX2;
56
1
  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
57
1
    simd_support = 0;
58
1
  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
59
0
    simd_huffman = 0;
60
1
#endif
61
1
}
62
63
GLOBAL(int)
64
jsimd_can_rgb_ycc(void)
65
0
{
66
0
  init_simd();
67
68
  /* The code is optimised for these values only */
69
0
  if (BITS_IN_JSAMPLE != 8)
70
0
    return 0;
71
0
  if (sizeof(JDIMENSION) != 4)
72
0
    return 0;
73
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
74
0
    return 0;
75
76
0
  if ((simd_support & JSIMD_AVX2) &&
77
0
      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
78
0
    return 1;
79
0
  if ((simd_support & JSIMD_SSE2) &&
80
0
      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
81
0
    return 1;
82
83
0
  return 0;
84
0
}
85
86
GLOBAL(int)
87
jsimd_can_rgb_gray(void)
88
0
{
89
0
  init_simd();
90
91
  /* The code is optimised for these values only */
92
0
  if (BITS_IN_JSAMPLE != 8)
93
0
    return 0;
94
0
  if (sizeof(JDIMENSION) != 4)
95
0
    return 0;
96
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
97
0
    return 0;
98
99
0
  if ((simd_support & JSIMD_AVX2) &&
100
0
      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
101
0
    return 1;
102
0
  if ((simd_support & JSIMD_SSE2) &&
103
0
      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
104
0
    return 1;
105
106
0
  return 0;
107
0
}
108
109
GLOBAL(int)
110
jsimd_can_ycc_rgb(void)
111
468
{
112
468
  init_simd();
113
114
  /* The code is optimised for these values only */
115
468
  if (BITS_IN_JSAMPLE != 8)
116
0
    return 0;
117
468
  if (sizeof(JDIMENSION) != 4)
118
0
    return 0;
119
468
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
120
0
    return 0;
121
122
468
  if ((simd_support & JSIMD_AVX2) &&
123
468
      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
124
0
    return 1;
125
468
  if ((simd_support & JSIMD_SSE2) &&
126
468
      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
127
0
    return 1;
128
129
468
  return 0;
130
468
}
131
132
GLOBAL(int)
133
jsimd_can_ycc_rgb565(void)
134
0
{
135
0
  return 0;
136
0
}
137
138
GLOBAL(void)
139
jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
140
                      JSAMPIMAGE output_buf, JDIMENSION output_row,
141
                      int num_rows)
142
0
{
143
0
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
144
0
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
145
146
0
  if (simd_support == ~0U)
147
0
    init_simd();
148
149
0
  switch (cinfo->in_color_space) {
150
0
  case JCS_EXT_RGB:
151
0
    avx2fct = jsimd_extrgb_ycc_convert_avx2;
152
0
    sse2fct = jsimd_extrgb_ycc_convert_sse2;
153
0
    break;
154
0
  case JCS_EXT_RGBX:
155
0
  case JCS_EXT_RGBA:
156
0
    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
157
0
    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
158
0
    break;
159
0
  case JCS_EXT_BGR:
160
0
    avx2fct = jsimd_extbgr_ycc_convert_avx2;
161
0
    sse2fct = jsimd_extbgr_ycc_convert_sse2;
162
0
    break;
163
0
  case JCS_EXT_BGRX:
164
0
  case JCS_EXT_BGRA:
165
0
    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
166
0
    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
167
0
    break;
168
0
  case JCS_EXT_XBGR:
169
0
  case JCS_EXT_ABGR:
170
0
    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
171
0
    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
172
0
    break;
173
0
  case JCS_EXT_XRGB:
174
0
  case JCS_EXT_ARGB:
175
0
    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
176
0
    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
177
0
    break;
178
0
  default:
179
0
    avx2fct = jsimd_rgb_ycc_convert_avx2;
180
0
    sse2fct = jsimd_rgb_ycc_convert_sse2;
181
0
    break;
182
0
  }
183
184
0
  if (simd_support & JSIMD_AVX2)
185
0
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
186
0
  else
187
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188
0
}
189
190
GLOBAL(void)
191
jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
192
                       JSAMPIMAGE output_buf, JDIMENSION output_row,
193
                       int num_rows)
194
0
{
195
0
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196
0
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
197
198
0
  if (simd_support == ~0U)
199
0
    init_simd();
200
201
0
  switch (cinfo->in_color_space) {
202
0
  case JCS_EXT_RGB:
203
0
    avx2fct = jsimd_extrgb_gray_convert_avx2;
204
0
    sse2fct = jsimd_extrgb_gray_convert_sse2;
205
0
    break;
206
0
  case JCS_EXT_RGBX:
207
0
  case JCS_EXT_RGBA:
208
0
    avx2fct = jsimd_extrgbx_gray_convert_avx2;
209
0
    sse2fct = jsimd_extrgbx_gray_convert_sse2;
210
0
    break;
211
0
  case JCS_EXT_BGR:
212
0
    avx2fct = jsimd_extbgr_gray_convert_avx2;
213
0
    sse2fct = jsimd_extbgr_gray_convert_sse2;
214
0
    break;
215
0
  case JCS_EXT_BGRX:
216
0
  case JCS_EXT_BGRA:
217
0
    avx2fct = jsimd_extbgrx_gray_convert_avx2;
218
0
    sse2fct = jsimd_extbgrx_gray_convert_sse2;
219
0
    break;
220
0
  case JCS_EXT_XBGR:
221
0
  case JCS_EXT_ABGR:
222
0
    avx2fct = jsimd_extxbgr_gray_convert_avx2;
223
0
    sse2fct = jsimd_extxbgr_gray_convert_sse2;
224
0
    break;
225
0
  case JCS_EXT_XRGB:
226
0
  case JCS_EXT_ARGB:
227
0
    avx2fct = jsimd_extxrgb_gray_convert_avx2;
228
0
    sse2fct = jsimd_extxrgb_gray_convert_sse2;
229
0
    break;
230
0
  default:
231
0
    avx2fct = jsimd_rgb_gray_convert_avx2;
232
0
    sse2fct = jsimd_rgb_gray_convert_sse2;
233
0
    break;
234
0
  }
235
236
0
  if (simd_support & JSIMD_AVX2)
237
0
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
238
0
  else
239
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240
0
}
241
242
GLOBAL(void)
243
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
244
                      JDIMENSION input_row, JSAMPARRAY output_buf,
245
                      int num_rows)
246
0
{
247
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
248
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
249
250
0
  if (simd_support == ~0U)
251
0
    init_simd();
252
253
0
  switch (cinfo->out_color_space) {
254
0
  case JCS_EXT_RGB:
255
0
    avx2fct = jsimd_ycc_extrgb_convert_avx2;
256
0
    sse2fct = jsimd_ycc_extrgb_convert_sse2;
257
0
    break;
258
0
  case JCS_EXT_RGBX:
259
0
  case JCS_EXT_RGBA:
260
0
    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
261
0
    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
262
0
    break;
263
0
  case JCS_EXT_BGR:
264
0
    avx2fct = jsimd_ycc_extbgr_convert_avx2;
265
0
    sse2fct = jsimd_ycc_extbgr_convert_sse2;
266
0
    break;
267
0
  case JCS_EXT_BGRX:
268
0
  case JCS_EXT_BGRA:
269
0
    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
270
0
    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
271
0
    break;
272
0
  case JCS_EXT_XBGR:
273
0
  case JCS_EXT_ABGR:
274
0
    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
275
0
    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
276
0
    break;
277
0
  case JCS_EXT_XRGB:
278
0
  case JCS_EXT_ARGB:
279
0
    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
280
0
    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
281
0
    break;
282
0
  default:
283
0
    avx2fct = jsimd_ycc_rgb_convert_avx2;
284
0
    sse2fct = jsimd_ycc_rgb_convert_sse2;
285
0
    break;
286
0
  }
287
288
0
  if (simd_support & JSIMD_AVX2)
289
0
    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
290
0
  else
291
0
    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292
0
}
293
294
GLOBAL(void)
295
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
296
                         JDIMENSION input_row, JSAMPARRAY output_buf,
297
                         int num_rows)
298
0
{
299
0
}
300
301
GLOBAL(int)
302
jsimd_can_h2v2_downsample(void)
303
0
{
304
0
  init_simd();
305
306
  /* The code is optimised for these values only */
307
0
  if (BITS_IN_JSAMPLE != 8)
308
0
    return 0;
309
0
  if (sizeof(JDIMENSION) != 4)
310
0
    return 0;
311
312
0
  if (simd_support & JSIMD_AVX2)
313
0
    return 1;
314
0
  if (simd_support & JSIMD_SSE2)
315
0
    return 1;
316
317
0
  return 0;
318
0
}
319
320
GLOBAL(int)
321
jsimd_can_h2v1_downsample(void)
322
0
{
323
0
  init_simd();
324
325
  /* The code is optimised for these values only */
326
0
  if (BITS_IN_JSAMPLE != 8)
327
0
    return 0;
328
0
  if (sizeof(JDIMENSION) != 4)
329
0
    return 0;
330
331
0
  if (simd_support & JSIMD_AVX2)
332
0
    return 1;
333
0
  if (simd_support & JSIMD_SSE2)
334
0
    return 1;
335
336
0
  return 0;
337
0
}
338
339
GLOBAL(void)
340
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
341
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
342
0
{
343
0
  if (simd_support == ~0U)
344
0
    init_simd();
345
346
0
  if (simd_support & JSIMD_AVX2)
347
0
    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
348
0
                               compptr->v_samp_factor,
349
0
                               compptr->width_in_blocks, input_data,
350
0
                               output_data);
351
0
  else
352
0
    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
353
0
                               compptr->v_samp_factor,
354
0
                               compptr->width_in_blocks, input_data,
355
0
                               output_data);
356
0
}
357
358
GLOBAL(void)
359
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
360
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
361
0
{
362
0
  if (simd_support == ~0U)
363
0
    init_simd();
364
365
0
  if (simd_support & JSIMD_AVX2)
366
0
    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
367
0
                               compptr->v_samp_factor,
368
0
                               compptr->width_in_blocks, input_data,
369
0
                               output_data);
370
0
  else
371
0
    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
372
0
                               compptr->v_samp_factor,
373
0
                               compptr->width_in_blocks, input_data,
374
0
                               output_data);
375
0
}
376
377
GLOBAL(int)
378
jsimd_can_h2v2_upsample(void)
379
240
{
380
240
  init_simd();
381
382
  /* The code is optimised for these values only */
383
240
  if (BITS_IN_JSAMPLE != 8)
384
0
    return 0;
385
240
  if (sizeof(JDIMENSION) != 4)
386
0
    return 0;
387
388
240
  if (simd_support & JSIMD_AVX2)
389
0
    return 1;
390
240
  if (simd_support & JSIMD_SSE2)
391
0
    return 1;
392
393
240
  return 0;
394
240
}
395
396
GLOBAL(int)
397
jsimd_can_h2v1_upsample(void)
398
279
{
399
279
  init_simd();
400
401
  /* The code is optimised for these values only */
402
279
  if (BITS_IN_JSAMPLE != 8)
403
0
    return 0;
404
279
  if (sizeof(JDIMENSION) != 4)
405
0
    return 0;
406
407
279
  if (simd_support & JSIMD_AVX2)
408
0
    return 1;
409
279
  if (simd_support & JSIMD_SSE2)
410
0
    return 1;
411
412
279
  return 0;
413
279
}
414
415
GLOBAL(void)
416
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
417
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
418
0
{
419
0
  if (simd_support == ~0U)
420
0
    init_simd();
421
422
0
  if (simd_support & JSIMD_AVX2)
423
0
    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
424
0
                             input_data, output_data_ptr);
425
0
  else
426
0
    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
427
0
                             input_data, output_data_ptr);
428
0
}
429
430
GLOBAL(void)
431
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
432
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
433
0
{
434
0
  if (simd_support == ~0U)
435
0
    init_simd();
436
437
0
  if (simd_support & JSIMD_AVX2)
438
0
    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
439
0
                             input_data, output_data_ptr);
440
0
  else
441
0
    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
442
0
                             input_data, output_data_ptr);
443
0
}
444
445
GLOBAL(int)
446
jsimd_can_h2v2_fancy_upsample(void)
447
154
{
448
154
  init_simd();
449
450
  /* The code is optimised for these values only */
451
154
  if (BITS_IN_JSAMPLE != 8)
452
0
    return 0;
453
154
  if (sizeof(JDIMENSION) != 4)
454
0
    return 0;
455
456
154
  if ((simd_support & JSIMD_AVX2) &&
457
154
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
458
0
    return 1;
459
154
  if ((simd_support & JSIMD_SSE2) &&
460
154
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
461
0
    return 1;
462
463
154
  return 0;
464
154
}
465
466
GLOBAL(int)
467
jsimd_can_h2v1_fancy_upsample(void)
468
204
{
469
204
  init_simd();
470
471
  /* The code is optimised for these values only */
472
204
  if (BITS_IN_JSAMPLE != 8)
473
0
    return 0;
474
204
  if (sizeof(JDIMENSION) != 4)
475
0
    return 0;
476
477
204
  if ((simd_support & JSIMD_AVX2) &&
478
204
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
479
0
    return 1;
480
204
  if ((simd_support & JSIMD_SSE2) &&
481
204
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
482
0
    return 1;
483
484
204
  return 0;
485
204
}
486
487
GLOBAL(void)
488
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
489
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
490
0
{
491
0
  if (simd_support == ~0U)
492
0
    init_simd();
493
494
0
  if (simd_support & JSIMD_AVX2)
495
0
    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
496
0
                                   compptr->downsampled_width, input_data,
497
0
                                   output_data_ptr);
498
0
  else
499
0
    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
500
0
                                   compptr->downsampled_width, input_data,
501
0
                                   output_data_ptr);
502
0
}
503
504
GLOBAL(void)
505
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
506
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
507
0
{
508
0
  if (simd_support == ~0U)
509
0
    init_simd();
510
511
0
  if (simd_support & JSIMD_AVX2)
512
0
    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
513
0
                                   compptr->downsampled_width, input_data,
514
0
                                   output_data_ptr);
515
0
  else
516
0
    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
517
0
                                   compptr->downsampled_width, input_data,
518
0
                                   output_data_ptr);
519
0
}
520
521
GLOBAL(int)
522
jsimd_can_h2v2_merged_upsample(void)
523
0
{
524
0
  init_simd();
525
526
  /* The code is optimised for these values only */
527
0
  if (BITS_IN_JSAMPLE != 8)
528
0
    return 0;
529
0
  if (sizeof(JDIMENSION) != 4)
530
0
    return 0;
531
532
0
  if ((simd_support & JSIMD_AVX2) &&
533
0
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
534
0
    return 1;
535
0
  if ((simd_support & JSIMD_SSE2) &&
536
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
537
0
    return 1;
538
539
0
  return 0;
540
0
}
541
542
GLOBAL(int)
543
jsimd_can_h2v1_merged_upsample(void)
544
0
{
545
0
  init_simd();
546
547
  /* The code is optimised for these values only */
548
0
  if (BITS_IN_JSAMPLE != 8)
549
0
    return 0;
550
0
  if (sizeof(JDIMENSION) != 4)
551
0
    return 0;
552
553
0
  if ((simd_support & JSIMD_AVX2) &&
554
0
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
555
0
    return 1;
556
0
  if ((simd_support & JSIMD_SSE2) &&
557
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
558
0
    return 1;
559
560
0
  return 0;
561
0
}
562
563
GLOBAL(void)
564
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
565
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
566
0
{
567
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
568
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
569
570
0
  if (simd_support == ~0U)
571
0
    init_simd();
572
573
0
  switch (cinfo->out_color_space) {
574
0
  case JCS_EXT_RGB:
575
0
    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
576
0
    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
577
0
    break;
578
0
  case JCS_EXT_RGBX:
579
0
  case JCS_EXT_RGBA:
580
0
    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
581
0
    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
582
0
    break;
583
0
  case JCS_EXT_BGR:
584
0
    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
585
0
    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
586
0
    break;
587
0
  case JCS_EXT_BGRX:
588
0
  case JCS_EXT_BGRA:
589
0
    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
590
0
    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
591
0
    break;
592
0
  case JCS_EXT_XBGR:
593
0
  case JCS_EXT_ABGR:
594
0
    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
595
0
    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
596
0
    break;
597
0
  case JCS_EXT_XRGB:
598
0
  case JCS_EXT_ARGB:
599
0
    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
600
0
    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
601
0
    break;
602
0
  default:
603
0
    avx2fct = jsimd_h2v2_merged_upsample_avx2;
604
0
    sse2fct = jsimd_h2v2_merged_upsample_sse2;
605
0
    break;
606
0
  }
607
608
0
  if (simd_support & JSIMD_AVX2)
609
0
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
610
0
  else
611
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
612
0
}
613
614
GLOBAL(void)
615
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
616
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
617
0
{
618
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
619
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
620
621
0
  if (simd_support == ~0U)
622
0
    init_simd();
623
624
0
  switch (cinfo->out_color_space) {
625
0
  case JCS_EXT_RGB:
626
0
    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
627
0
    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
628
0
    break;
629
0
  case JCS_EXT_RGBX:
630
0
  case JCS_EXT_RGBA:
631
0
    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
632
0
    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
633
0
    break;
634
0
  case JCS_EXT_BGR:
635
0
    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
636
0
    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
637
0
    break;
638
0
  case JCS_EXT_BGRX:
639
0
  case JCS_EXT_BGRA:
640
0
    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
641
0
    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
642
0
    break;
643
0
  case JCS_EXT_XBGR:
644
0
  case JCS_EXT_ABGR:
645
0
    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
646
0
    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
647
0
    break;
648
0
  case JCS_EXT_XRGB:
649
0
  case JCS_EXT_ARGB:
650
0
    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
651
0
    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
652
0
    break;
653
0
  default:
654
0
    avx2fct = jsimd_h2v1_merged_upsample_avx2;
655
0
    sse2fct = jsimd_h2v1_merged_upsample_sse2;
656
0
    break;
657
0
  }
658
659
0
  if (simd_support & JSIMD_AVX2)
660
0
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
661
0
  else
662
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663
0
}
664
665
GLOBAL(int)
666
jsimd_can_convsamp(void)
667
0
{
668
0
  init_simd();
669
670
  /* The code is optimised for these values only */
671
0
  if (DCTSIZE != 8)
672
0
    return 0;
673
0
  if (BITS_IN_JSAMPLE != 8)
674
0
    return 0;
675
0
  if (sizeof(JDIMENSION) != 4)
676
0
    return 0;
677
0
  if (sizeof(DCTELEM) != 2)
678
0
    return 0;
679
680
0
  if (simd_support & JSIMD_AVX2)
681
0
    return 1;
682
0
  if (simd_support & JSIMD_SSE2)
683
0
    return 1;
684
685
0
  return 0;
686
0
}
687
688
GLOBAL(int)
689
jsimd_can_convsamp_float(void)
690
0
{
691
0
  init_simd();
692
693
  /* The code is optimised for these values only */
694
0
  if (DCTSIZE != 8)
695
0
    return 0;
696
0
  if (BITS_IN_JSAMPLE != 8)
697
0
    return 0;
698
0
  if (sizeof(JDIMENSION) != 4)
699
0
    return 0;
700
0
  if (sizeof(FAST_FLOAT) != 4)
701
0
    return 0;
702
703
0
  if (simd_support & JSIMD_SSE2)
704
0
    return 1;
705
706
0
  return 0;
707
0
}
708
709
GLOBAL(void)
710
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
711
               DCTELEM *workspace)
712
0
{
713
0
  if (simd_support == ~0U)
714
0
    init_simd();
715
716
0
  if (simd_support & JSIMD_AVX2)
717
0
    jsimd_convsamp_avx2(sample_data, start_col, workspace);
718
0
  else
719
0
    jsimd_convsamp_sse2(sample_data, start_col, workspace);
720
0
}
721
722
GLOBAL(void)
723
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
724
                     FAST_FLOAT *workspace)
725
0
{
726
0
  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
727
0
}
728
729
GLOBAL(int)
730
jsimd_can_fdct_islow(void)
731
0
{
732
0
  init_simd();
733
734
  /* The code is optimised for these values only */
735
0
  if (DCTSIZE != 8)
736
0
    return 0;
737
0
  if (sizeof(DCTELEM) != 2)
738
0
    return 0;
739
740
0
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
741
0
    return 1;
742
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
743
0
    return 1;
744
745
0
  return 0;
746
0
}
747
748
GLOBAL(int)
749
jsimd_can_fdct_ifast(void)
750
0
{
751
0
  init_simd();
752
753
  /* The code is optimised for these values only */
754
0
  if (DCTSIZE != 8)
755
0
    return 0;
756
0
  if (sizeof(DCTELEM) != 2)
757
0
    return 0;
758
759
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
760
0
    return 1;
761
762
0
  return 0;
763
0
}
764
765
GLOBAL(int)
766
jsimd_can_fdct_float(void)
767
0
{
768
0
  init_simd();
769
770
  /* The code is optimised for these values only */
771
0
  if (DCTSIZE != 8)
772
0
    return 0;
773
0
  if (sizeof(FAST_FLOAT) != 4)
774
0
    return 0;
775
776
0
  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
777
0
    return 1;
778
779
0
  return 0;
780
0
}
781
782
GLOBAL(void)
783
jsimd_fdct_islow(DCTELEM *data)
784
0
{
785
0
  if (simd_support == ~0U)
786
0
    init_simd();
787
788
0
  if (simd_support & JSIMD_AVX2)
789
0
    jsimd_fdct_islow_avx2(data);
790
0
  else
791
0
    jsimd_fdct_islow_sse2(data);
792
0
}
793
794
GLOBAL(void)
795
jsimd_fdct_ifast(DCTELEM *data)
796
0
{
797
0
  jsimd_fdct_ifast_sse2(data);
798
0
}
799
800
GLOBAL(void)
801
jsimd_fdct_float(FAST_FLOAT *data)
802
0
{
803
0
  jsimd_fdct_float_sse(data);
804
0
}
805
806
GLOBAL(int)
807
jsimd_can_quantize(void)
808
0
{
809
0
  init_simd();
810
811
  /* The code is optimised for these values only */
812
0
  if (DCTSIZE != 8)
813
0
    return 0;
814
0
  if (sizeof(JCOEF) != 2)
815
0
    return 0;
816
0
  if (sizeof(DCTELEM) != 2)
817
0
    return 0;
818
819
0
  if (simd_support & JSIMD_AVX2)
820
0
    return 1;
821
0
  if (simd_support & JSIMD_SSE2)
822
0
    return 1;
823
824
0
  return 0;
825
0
}
826
827
GLOBAL(int)
828
jsimd_can_quantize_float(void)
829
0
{
830
0
  init_simd();
831
832
  /* The code is optimised for these values only */
833
0
  if (DCTSIZE != 8)
834
0
    return 0;
835
0
  if (sizeof(JCOEF) != 2)
836
0
    return 0;
837
0
  if (sizeof(FAST_FLOAT) != 4)
838
0
    return 0;
839
840
0
  if (simd_support & JSIMD_SSE2)
841
0
    return 1;
842
843
0
  return 0;
844
0
}
845
846
GLOBAL(void)
847
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
848
0
{
849
0
  if (simd_support == ~0U)
850
0
    init_simd();
851
852
0
  if (simd_support & JSIMD_AVX2)
853
0
    jsimd_quantize_avx2(coef_block, divisors, workspace);
854
0
  else
855
0
    jsimd_quantize_sse2(coef_block, divisors, workspace);
856
0
}
857
858
GLOBAL(void)
859
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
860
                     FAST_FLOAT *workspace)
861
0
{
862
0
  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
863
0
}
864
865
GLOBAL(int)
866
jsimd_can_idct_2x2(void)
867
0
{
868
0
  init_simd();
869
870
  /* The code is optimised for these values only */
871
0
  if (DCTSIZE != 8)
872
0
    return 0;
873
0
  if (sizeof(JCOEF) != 2)
874
0
    return 0;
875
0
  if (BITS_IN_JSAMPLE != 8)
876
0
    return 0;
877
0
  if (sizeof(JDIMENSION) != 4)
878
0
    return 0;
879
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
880
0
    return 0;
881
882
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
883
0
    return 1;
884
885
0
  return 0;
886
0
}
887
888
GLOBAL(int)
889
jsimd_can_idct_4x4(void)
890
0
{
891
0
  init_simd();
892
893
  /* The code is optimised for these values only */
894
0
  if (DCTSIZE != 8)
895
0
    return 0;
896
0
  if (sizeof(JCOEF) != 2)
897
0
    return 0;
898
0
  if (BITS_IN_JSAMPLE != 8)
899
0
    return 0;
900
0
  if (sizeof(JDIMENSION) != 4)
901
0
    return 0;
902
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
903
0
    return 0;
904
905
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
906
0
    return 1;
907
908
0
  return 0;
909
0
}
910
911
GLOBAL(void)
912
jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
913
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
914
               JDIMENSION output_col)
915
0
{
916
0
  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
917
0
}
918
919
GLOBAL(void)
920
jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
921
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
922
               JDIMENSION output_col)
923
0
{
924
0
  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
925
0
}
926
927
GLOBAL(int)
928
jsimd_can_idct_islow(void)
929
1.90k
{
930
1.90k
  init_simd();
931
932
  /* The code is optimised for these values only */
933
1.90k
  if (DCTSIZE != 8)
934
0
    return 0;
935
1.90k
  if (sizeof(JCOEF) != 2)
936
0
    return 0;
937
1.90k
  if (BITS_IN_JSAMPLE != 8)
938
0
    return 0;
939
1.90k
  if (sizeof(JDIMENSION) != 4)
940
0
    return 0;
941
1.90k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
942
0
    return 0;
943
944
1.90k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
945
0
    return 1;
946
1.90k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
947
0
    return 1;
948
949
1.90k
  return 0;
950
1.90k
}
951
952
GLOBAL(int)
953
jsimd_can_idct_ifast(void)
954
0
{
955
0
  init_simd();
956
957
  /* The code is optimised for these values only */
958
0
  if (DCTSIZE != 8)
959
0
    return 0;
960
0
  if (sizeof(JCOEF) != 2)
961
0
    return 0;
962
0
  if (BITS_IN_JSAMPLE != 8)
963
0
    return 0;
964
0
  if (sizeof(JDIMENSION) != 4)
965
0
    return 0;
966
0
  if (sizeof(IFAST_MULT_TYPE) != 2)
967
0
    return 0;
968
0
  if (IFAST_SCALE_BITS != 2)
969
0
    return 0;
970
971
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
972
0
    return 1;
973
974
0
  return 0;
975
0
}
976
977
GLOBAL(int)
978
jsimd_can_idct_float(void)
979
0
{
980
0
  init_simd();
981
982
0
  if (DCTSIZE != 8)
983
0
    return 0;
984
0
  if (sizeof(JCOEF) != 2)
985
0
    return 0;
986
0
  if (BITS_IN_JSAMPLE != 8)
987
0
    return 0;
988
0
  if (sizeof(JDIMENSION) != 4)
989
0
    return 0;
990
0
  if (sizeof(FAST_FLOAT) != 4)
991
0
    return 0;
992
0
  if (sizeof(FLOAT_MULT_TYPE) != 4)
993
0
    return 0;
994
995
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
996
0
    return 1;
997
998
0
  return 0;
999
0
}
1000
1001
GLOBAL(void)
1002
jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1003
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1004
                 JDIMENSION output_col)
1005
0
{
1006
0
  if (simd_support == ~0U)
1007
0
    init_simd();
1008
1009
0
  if (simd_support & JSIMD_AVX2)
1010
0
    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1011
0
                          output_col);
1012
0
  else
1013
0
    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1014
0
                          output_col);
1015
0
}
1016
1017
GLOBAL(void)
1018
jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1019
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1020
                 JDIMENSION output_col)
1021
0
{
1022
0
  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1023
0
                        output_col);
1024
0
}
1025
1026
GLOBAL(void)
1027
jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1028
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1029
                 JDIMENSION output_col)
1030
0
{
1031
0
  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1032
0
                        output_col);
1033
0
}
1034
1035
GLOBAL(int)
1036
jsimd_can_huff_encode_one_block(void)
1037
0
{
1038
0
  init_simd();
1039
1040
0
  if (DCTSIZE != 8)
1041
0
    return 0;
1042
0
  if (sizeof(JCOEF) != 2)
1043
0
    return 0;
1044
1045
0
  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1046
0
      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1047
0
    return 1;
1048
1049
0
  return 0;
1050
0
}
1051
1052
GLOBAL(JOCTET *)
1053
jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1054
                            int last_dc_val, c_derived_tbl *dctbl,
1055
                            c_derived_tbl *actbl)
1056
0
{
1057
0
  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1058
0
                                          dctbl, actbl);
1059
0
}
1060
1061
GLOBAL(int)
1062
jsimd_can_encode_mcu_AC_first_prepare(void)
1063
0
{
1064
0
  init_simd();
1065
1066
0
  if (DCTSIZE != 8)
1067
0
    return 0;
1068
0
  if (sizeof(JCOEF) != 2)
1069
0
    return 0;
1070
0
  if (simd_support & JSIMD_SSE2)
1071
0
    return 1;
1072
1073
0
  return 0;
1074
0
}
1075
1076
GLOBAL(void)
1077
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1078
                                  const int *jpeg_natural_order_start, int Sl,
1079
                                  int Al, UJCOEF *values, size_t *zerobits)
1080
0
{
1081
0
  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1082
0
                                         Sl, Al, values, zerobits);
1083
0
}
1084
1085
GLOBAL(int)
1086
jsimd_can_encode_mcu_AC_refine_prepare(void)
1087
0
{
1088
0
  init_simd();
1089
1090
0
  if (DCTSIZE != 8)
1091
0
    return 0;
1092
0
  if (sizeof(JCOEF) != 2)
1093
0
    return 0;
1094
0
  if (simd_support & JSIMD_SSE2)
1095
0
    return 1;
1096
1097
0
  return 0;
1098
0
}
1099
1100
GLOBAL(int)
1101
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1102
                                   const int *jpeg_natural_order_start, int Sl,
1103
                                   int Al, UJCOEF *absvalues, size_t *bits)
1104
0
{
1105
0
  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1106
0
                                                 jpeg_natural_order_start,
1107
0
                                                 Sl, Al, absvalues, bits);
1108
0
}