Coverage Report

Created: 2026-02-26 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.3.0.x/simd/x86_64/jsimd.c
Line
Count
Source
1
/*
2
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
3
 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
4
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
5
 *
6
 * Based on the x86 SIMD extension for IJG JPEG library,
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * For conditions of distribution and use, see copyright notice in jsimdext.inc
9
 *
10
 * This file contains the interface between the "normal" portions
11
 * of the library and the SIMD implementations when running on a
12
 * 64-bit x86 architecture.
13
 */
14
15
#define JPEG_INTERNALS
16
#include "../../jinclude.h"
17
#include "../../jpeglib.h"
18
#include "../../jsimd.h"
19
#include "../../jdct.h"
20
#include "../../jsimddct.h"
21
#include "../jsimd.h"
22
23
/*
24
 * In the PIC cases, we have no guarantee that constants will keep
25
 * their alignment. This macro allows us to verify it at runtime.
26
 */
27
310k
#define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
28
29
197k
#define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
30
113k
#define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
31
32
static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
33
static THREAD_LOCAL unsigned int simd_huffman = 1;
34
35
/*
36
 * Check what SIMD accelerations are supported.
37
 */
38
LOCAL(void)
39
init_simd(void)
40
739k
{
41
739k
#ifndef NO_GETENV
42
739k
  char env[2] = { 0 };
43
739k
#endif
44
45
739k
  if (simd_support != ~0U)
46
739k
    return;
47
48
15
  simd_support = jpeg_simd_cpu_support();
49
50
15
#ifndef NO_GETENV
51
  /* Force different settings through environment variables */
52
15
  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
53
0
    simd_support &= JSIMD_SSE2;
54
15
  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
55
0
    simd_support &= JSIMD_AVX2;
56
15
  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
57
0
    simd_support = 0;
58
15
  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
59
0
    simd_huffman = 0;
60
15
#endif
61
15
}
62
63
GLOBAL(int)
64
jsimd_can_rgb_ycc(void)
65
31.6k
{
66
31.6k
  init_simd();
67
68
  /* The code is optimised for these values only */
69
31.6k
  if (BITS_IN_JSAMPLE != 8)
70
0
    return 0;
71
31.6k
  if (sizeof(JDIMENSION) != 4)
72
0
    return 0;
73
31.6k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
74
0
    return 0;
75
76
31.6k
  if ((simd_support & JSIMD_AVX2) &&
77
31.6k
      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
78
31.6k
    return 1;
79
0
  if ((simd_support & JSIMD_SSE2) &&
80
0
      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
81
0
    return 1;
82
83
0
  return 0;
84
0
}
85
86
GLOBAL(int)
87
jsimd_can_rgb_gray(void)
88
7.50k
{
89
7.50k
  init_simd();
90
91
  /* The code is optimised for these values only */
92
7.50k
  if (BITS_IN_JSAMPLE != 8)
93
0
    return 0;
94
7.50k
  if (sizeof(JDIMENSION) != 4)
95
0
    return 0;
96
7.50k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
97
0
    return 0;
98
99
7.50k
  if ((simd_support & JSIMD_AVX2) &&
100
7.50k
      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
101
7.50k
    return 1;
102
0
  if ((simd_support & JSIMD_SSE2) &&
103
0
      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
104
0
    return 1;
105
106
0
  return 0;
107
0
}
108
109
GLOBAL(int)
110
jsimd_can_ycc_rgb(void)
111
10.6k
{
112
10.6k
  init_simd();
113
114
  /* The code is optimised for these values only */
115
10.6k
  if (BITS_IN_JSAMPLE != 8)
116
0
    return 0;
117
10.6k
  if (sizeof(JDIMENSION) != 4)
118
0
    return 0;
119
10.6k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
120
0
    return 0;
121
122
10.6k
  if ((simd_support & JSIMD_AVX2) &&
123
10.6k
      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
124
10.6k
    return 1;
125
0
  if ((simd_support & JSIMD_SSE2) &&
126
0
      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
127
0
    return 1;
128
129
0
  return 0;
130
0
}
131
132
GLOBAL(int)
133
jsimd_can_ycc_rgb565(void)
134
1.97k
{
135
1.97k
  return 0;
136
1.97k
}
137
138
GLOBAL(void)
139
jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
140
                      JSAMPIMAGE output_buf, JDIMENSION output_row,
141
                      int num_rows)
142
98.6M
{
143
98.6M
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
144
98.6M
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
145
146
98.6M
  if (simd_support == ~0U)
147
0
    init_simd();
148
149
98.6M
  switch (cinfo->in_color_space) {
150
19.9M
  case JCS_EXT_RGB:
151
19.9M
    avx2fct = jsimd_extrgb_ycc_convert_avx2;
152
19.9M
    sse2fct = jsimd_extrgb_ycc_convert_sse2;
153
19.9M
    break;
154
5.91M
  case JCS_EXT_RGBX:
155
5.91M
  case JCS_EXT_RGBA:
156
5.91M
    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
157
5.91M
    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
158
5.91M
    break;
159
18.2M
  case JCS_EXT_BGR:
160
18.2M
    avx2fct = jsimd_extbgr_ycc_convert_avx2;
161
18.2M
    sse2fct = jsimd_extbgr_ycc_convert_sse2;
162
18.2M
    break;
163
0
  case JCS_EXT_BGRX:
164
11.8M
  case JCS_EXT_BGRA:
165
11.8M
    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
166
11.8M
    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
167
11.8M
    break;
168
12.8M
  case JCS_EXT_XBGR:
169
12.8M
  case JCS_EXT_ABGR:
170
12.8M
    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
171
12.8M
    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
172
12.8M
    break;
173
12.8M
  case JCS_EXT_XRGB:
174
12.8M
  case JCS_EXT_ARGB:
175
12.8M
    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
176
12.8M
    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
177
12.8M
    break;
178
16.9M
  default:
179
16.9M
    avx2fct = jsimd_rgb_ycc_convert_avx2;
180
16.9M
    sse2fct = jsimd_rgb_ycc_convert_sse2;
181
16.9M
    break;
182
98.6M
  }
183
184
98.6M
  if (simd_support & JSIMD_AVX2)
185
98.6M
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
186
0
  else
187
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188
98.6M
}
189
190
GLOBAL(void)
191
jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
192
                       JSAMPIMAGE output_buf, JDIMENSION output_row,
193
                       int num_rows)
194
24.7M
{
195
24.7M
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196
24.7M
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
197
198
24.7M
  if (simd_support == ~0U)
199
0
    init_simd();
200
201
24.7M
  switch (cinfo->in_color_space) {
202
0
  case JCS_EXT_RGB:
203
0
    avx2fct = jsimd_extrgb_gray_convert_avx2;
204
0
    sse2fct = jsimd_extrgb_gray_convert_sse2;
205
0
    break;
206
0
  case JCS_EXT_RGBX:
207
0
  case JCS_EXT_RGBA:
208
0
    avx2fct = jsimd_extrgbx_gray_convert_avx2;
209
0
    sse2fct = jsimd_extrgbx_gray_convert_sse2;
210
0
    break;
211
12.8M
  case JCS_EXT_BGR:
212
12.8M
    avx2fct = jsimd_extbgr_gray_convert_avx2;
213
12.8M
    sse2fct = jsimd_extbgr_gray_convert_sse2;
214
12.8M
    break;
215
0
  case JCS_EXT_BGRX:
216
0
  case JCS_EXT_BGRA:
217
0
    avx2fct = jsimd_extbgrx_gray_convert_avx2;
218
0
    sse2fct = jsimd_extbgrx_gray_convert_sse2;
219
0
    break;
220
0
  case JCS_EXT_XBGR:
221
0
  case JCS_EXT_ABGR:
222
0
    avx2fct = jsimd_extxbgr_gray_convert_avx2;
223
0
    sse2fct = jsimd_extxbgr_gray_convert_sse2;
224
0
    break;
225
11.8M
  case JCS_EXT_XRGB:
226
11.8M
  case JCS_EXT_ARGB:
227
11.8M
    avx2fct = jsimd_extxrgb_gray_convert_avx2;
228
11.8M
    sse2fct = jsimd_extxrgb_gray_convert_sse2;
229
11.8M
    break;
230
0
  default:
231
0
    avx2fct = jsimd_rgb_gray_convert_avx2;
232
0
    sse2fct = jsimd_rgb_gray_convert_sse2;
233
0
    break;
234
24.7M
  }
235
236
24.7M
  if (simd_support & JSIMD_AVX2)
237
24.7M
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
238
0
  else
239
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240
24.7M
}
241
242
GLOBAL(void)
243
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
244
                      JDIMENSION input_row, JSAMPARRAY output_buf,
245
                      int num_rows)
246
100M
{
247
100M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
248
100M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
249
250
100M
  if (simd_support == ~0U)
251
0
    init_simd();
252
253
100M
  switch (cinfo->out_color_space) {
254
2.00M
  case JCS_EXT_RGB:
255
2.00M
    avx2fct = jsimd_ycc_extrgb_convert_avx2;
256
2.00M
    sse2fct = jsimd_ycc_extrgb_convert_sse2;
257
2.00M
    break;
258
0
  case JCS_EXT_RGBX:
259
120k
  case JCS_EXT_RGBA:
260
120k
    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
261
120k
    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
262
120k
    break;
263
61.7M
  case JCS_EXT_BGR:
264
61.7M
    avx2fct = jsimd_ycc_extbgr_convert_avx2;
265
61.7M
    sse2fct = jsimd_ycc_extbgr_convert_sse2;
266
61.7M
    break;
267
1.36M
  case JCS_EXT_BGRX:
268
1.36M
  case JCS_EXT_BGRA:
269
1.36M
    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
270
1.36M
    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
271
1.36M
    break;
272
0
  case JCS_EXT_XBGR:
273
139k
  case JCS_EXT_ABGR:
274
139k
    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
275
139k
    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
276
139k
    break;
277
302k
  case JCS_EXT_XRGB:
278
302k
  case JCS_EXT_ARGB:
279
302k
    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
280
302k
    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
281
302k
    break;
282
34.8M
  default:
283
34.8M
    avx2fct = jsimd_ycc_rgb_convert_avx2;
284
34.8M
    sse2fct = jsimd_ycc_rgb_convert_sse2;
285
34.8M
    break;
286
100M
  }
287
288
100M
  if (simd_support & JSIMD_AVX2)
289
100M
    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
290
0
  else
291
0
    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292
100M
}
293
294
GLOBAL(void)
295
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
296
                         JDIMENSION input_row, JSAMPARRAY output_buf,
297
                         int num_rows)
298
0
{
299
0
}
300
301
GLOBAL(int)
302
jsimd_can_h2v2_downsample(void)
303
15.0k
{
304
15.0k
  init_simd();
305
306
  /* The code is optimised for these values only */
307
15.0k
  if (BITS_IN_JSAMPLE != 8)
308
0
    return 0;
309
15.0k
  if (sizeof(JDIMENSION) != 4)
310
0
    return 0;
311
312
15.0k
  if (simd_support & JSIMD_AVX2)
313
15.0k
    return 1;
314
0
  if (simd_support & JSIMD_SSE2)
315
0
    return 1;
316
317
0
  return 0;
318
0
}
319
320
GLOBAL(int)
321
jsimd_can_h2v1_downsample(void)
322
17.6k
{
323
17.6k
  init_simd();
324
325
  /* The code is optimised for these values only */
326
17.6k
  if (BITS_IN_JSAMPLE != 8)
327
0
    return 0;
328
17.6k
  if (sizeof(JDIMENSION) != 4)
329
0
    return 0;
330
331
17.6k
  if (simd_support & JSIMD_AVX2)
332
17.6k
    return 1;
333
0
  if (simd_support & JSIMD_SSE2)
334
0
    return 1;
335
336
0
  return 0;
337
0
}
338
339
GLOBAL(void)
340
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
341
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
342
24.7M
{
343
24.7M
  if (simd_support == ~0U)
344
0
    init_simd();
345
346
24.7M
  if (simd_support & JSIMD_AVX2)
347
24.7M
    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
348
24.7M
                               compptr->v_samp_factor,
349
24.7M
                               compptr->width_in_blocks, input_data,
350
24.7M
                               output_data);
351
0
  else
352
0
    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
353
0
                               compptr->v_samp_factor,
354
0
                               compptr->width_in_blocks, input_data,
355
0
                               output_data);
356
24.7M
}
357
358
GLOBAL(void)
359
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
360
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
361
56.9M
{
362
56.9M
  if (simd_support == ~0U)
363
0
    init_simd();
364
365
56.9M
  if (simd_support & JSIMD_AVX2)
366
56.9M
    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
367
56.9M
                               compptr->v_samp_factor,
368
56.9M
                               compptr->width_in_blocks, input_data,
369
56.9M
                               output_data);
370
0
  else
371
0
    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
372
0
                               compptr->v_samp_factor,
373
0
                               compptr->width_in_blocks, input_data,
374
0
                               output_data);
375
56.9M
}
376
377
GLOBAL(int)
378
jsimd_can_h2v2_upsample(void)
379
5.79k
{
380
5.79k
  init_simd();
381
382
  /* The code is optimised for these values only */
383
5.79k
  if (BITS_IN_JSAMPLE != 8)
384
0
    return 0;
385
5.79k
  if (sizeof(JDIMENSION) != 4)
386
0
    return 0;
387
388
5.79k
  if (simd_support & JSIMD_AVX2)
389
5.79k
    return 1;
390
0
  if (simd_support & JSIMD_SSE2)
391
0
    return 1;
392
393
0
  return 0;
394
0
}
395
396
GLOBAL(int)
397
jsimd_can_h2v1_upsample(void)
398
2.94k
{
399
2.94k
  init_simd();
400
401
  /* The code is optimised for these values only */
402
2.94k
  if (BITS_IN_JSAMPLE != 8)
403
0
    return 0;
404
2.94k
  if (sizeof(JDIMENSION) != 4)
405
0
    return 0;
406
407
2.94k
  if (simd_support & JSIMD_AVX2)
408
2.94k
    return 1;
409
0
  if (simd_support & JSIMD_SSE2)
410
0
    return 1;
411
412
0
  return 0;
413
0
}
414
415
GLOBAL(void)
416
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
417
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
418
2.04M
{
419
2.04M
  if (simd_support == ~0U)
420
0
    init_simd();
421
422
2.04M
  if (simd_support & JSIMD_AVX2)
423
2.04M
    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
424
2.04M
                             input_data, output_data_ptr);
425
0
  else
426
0
    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
427
0
                             input_data, output_data_ptr);
428
2.04M
}
429
430
GLOBAL(void)
431
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
432
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
433
6.78M
{
434
6.78M
  if (simd_support == ~0U)
435
0
    init_simd();
436
437
6.78M
  if (simd_support & JSIMD_AVX2)
438
6.78M
    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
439
6.78M
                             input_data, output_data_ptr);
440
0
  else
441
0
    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
442
0
                             input_data, output_data_ptr);
443
6.78M
}
444
445
GLOBAL(int)
446
jsimd_can_h2v2_fancy_upsample(void)
447
5.04k
{
448
5.04k
  init_simd();
449
450
  /* The code is optimised for these values only */
451
5.04k
  if (BITS_IN_JSAMPLE != 8)
452
0
    return 0;
453
5.04k
  if (sizeof(JDIMENSION) != 4)
454
0
    return 0;
455
456
5.04k
  if ((simd_support & JSIMD_AVX2) &&
457
5.04k
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
458
5.04k
    return 1;
459
0
  if ((simd_support & JSIMD_SSE2) &&
460
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
461
0
    return 1;
462
463
0
  return 0;
464
0
}
465
466
GLOBAL(int)
467
jsimd_can_h2v1_fancy_upsample(void)
468
5.94k
{
469
5.94k
  init_simd();
470
471
  /* The code is optimised for these values only */
472
5.94k
  if (BITS_IN_JSAMPLE != 8)
473
0
    return 0;
474
5.94k
  if (sizeof(JDIMENSION) != 4)
475
0
    return 0;
476
477
5.94k
  if ((simd_support & JSIMD_AVX2) &&
478
5.94k
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
479
5.94k
    return 1;
480
0
  if ((simd_support & JSIMD_SSE2) &&
481
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
482
0
    return 1;
483
484
0
  return 0;
485
0
}
486
487
GLOBAL(void)
488
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
489
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
490
14.5M
{
491
14.5M
  if (simd_support == ~0U)
492
0
    init_simd();
493
494
14.5M
  if (simd_support & JSIMD_AVX2)
495
14.5M
    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
496
14.5M
                                   compptr->downsampled_width, input_data,
497
14.5M
                                   output_data_ptr);
498
0
  else
499
0
    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
500
0
                                   compptr->downsampled_width, input_data,
501
0
                                   output_data_ptr);
502
14.5M
}
503
504
GLOBAL(void)
505
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
506
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
507
139M
{
508
139M
  if (simd_support == ~0U)
509
0
    init_simd();
510
511
139M
  if (simd_support & JSIMD_AVX2)
512
139M
    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
513
139M
                                   compptr->downsampled_width, input_data,
514
139M
                                   output_data_ptr);
515
0
  else
516
0
    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
517
0
                                   compptr->downsampled_width, input_data,
518
0
                                   output_data_ptr);
519
139M
}
520
521
GLOBAL(int)
522
jsimd_can_h2v2_merged_upsample(void)
523
562
{
524
562
  init_simd();
525
526
  /* The code is optimised for these values only */
527
562
  if (BITS_IN_JSAMPLE != 8)
528
0
    return 0;
529
562
  if (sizeof(JDIMENSION) != 4)
530
0
    return 0;
531
532
562
  if ((simd_support & JSIMD_AVX2) &&
533
562
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
534
562
    return 1;
535
0
  if ((simd_support & JSIMD_SSE2) &&
536
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
537
0
    return 1;
538
539
0
  return 0;
540
0
}
541
542
GLOBAL(int)
543
jsimd_can_h2v1_merged_upsample(void)
544
163
{
545
163
  init_simd();
546
547
  /* The code is optimised for these values only */
548
163
  if (BITS_IN_JSAMPLE != 8)
549
0
    return 0;
550
163
  if (sizeof(JDIMENSION) != 4)
551
0
    return 0;
552
553
163
  if ((simd_support & JSIMD_AVX2) &&
554
163
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
555
163
    return 1;
556
0
  if ((simd_support & JSIMD_SSE2) &&
557
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
558
0
    return 1;
559
560
0
  return 0;
561
0
}
562
563
GLOBAL(void)
564
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
565
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
566
1.05M
{
567
1.05M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
568
1.05M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
569
570
1.05M
  if (simd_support == ~0U)
571
0
    init_simd();
572
573
1.05M
  switch (cinfo->out_color_space) {
574
416k
  case JCS_EXT_RGB:
575
416k
    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
576
416k
    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
577
416k
    break;
578
0
  case JCS_EXT_RGBX:
579
26.2k
  case JCS_EXT_RGBA:
580
26.2k
    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
581
26.2k
    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
582
26.2k
    break;
583
357k
  case JCS_EXT_BGR:
584
357k
    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
585
357k
    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
586
357k
    break;
587
0
  case JCS_EXT_BGRX:
588
0
  case JCS_EXT_BGRA:
589
0
    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
590
0
    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
591
0
    break;
592
0
  case JCS_EXT_XBGR:
593
0
  case JCS_EXT_ABGR:
594
0
    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
595
0
    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
596
0
    break;
597
250k
  case JCS_EXT_XRGB:
598
250k
  case JCS_EXT_ARGB:
599
250k
    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
600
250k
    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
601
250k
    break;
602
0
  default:
603
0
    avx2fct = jsimd_h2v2_merged_upsample_avx2;
604
0
    sse2fct = jsimd_h2v2_merged_upsample_sse2;
605
0
    break;
606
1.05M
  }
607
608
1.05M
  if (simd_support & JSIMD_AVX2)
609
1.05M
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
610
0
  else
611
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
612
1.05M
}
613
614
GLOBAL(void)
615
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
616
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
617
966k
{
618
966k
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
619
966k
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
620
621
966k
  if (simd_support == ~0U)
622
0
    init_simd();
623
624
966k
  switch (cinfo->out_color_space) {
625
307k
  case JCS_EXT_RGB:
626
307k
    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
627
307k
    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
628
307k
    break;
629
0
  case JCS_EXT_RGBX:
630
77.4k
  case JCS_EXT_RGBA:
631
77.4k
    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
632
77.4k
    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
633
77.4k
    break;
634
361k
  case JCS_EXT_BGR:
635
361k
    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
636
361k
    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
637
361k
    break;
638
0
  case JCS_EXT_BGRX:
639
0
  case JCS_EXT_BGRA:
640
0
    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
641
0
    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
642
0
    break;
643
0
  case JCS_EXT_XBGR:
644
0
  case JCS_EXT_ABGR:
645
0
    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
646
0
    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
647
0
    break;
648
219k
  case JCS_EXT_XRGB:
649
219k
  case JCS_EXT_ARGB:
650
219k
    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
651
219k
    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
652
219k
    break;
653
0
  default:
654
0
    avx2fct = jsimd_h2v1_merged_upsample_avx2;
655
0
    sse2fct = jsimd_h2v1_merged_upsample_sse2;
656
0
    break;
657
966k
  }
658
659
966k
  if (simd_support & JSIMD_AVX2)
660
966k
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
661
0
  else
662
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663
966k
}
664
665
GLOBAL(int)
666
jsimd_can_convsamp(void)
667
48.4k
{
668
48.4k
  init_simd();
669
670
  /* The code is optimised for these values only */
671
48.4k
  if (DCTSIZE != 8)
672
0
    return 0;
673
48.4k
  if (BITS_IN_JSAMPLE != 8)
674
0
    return 0;
675
48.4k
  if (sizeof(JDIMENSION) != 4)
676
0
    return 0;
677
48.4k
  if (sizeof(DCTELEM) != 2)
678
0
    return 0;
679
680
48.4k
  if (simd_support & JSIMD_AVX2)
681
48.4k
    return 1;
682
0
  if (simd_support & JSIMD_SSE2)
683
0
    return 1;
684
685
0
  return 0;
686
0
}
687
688
GLOBAL(int)
689
jsimd_can_convsamp_float(void)
690
6.49k
{
691
6.49k
  init_simd();
692
693
  /* The code is optimised for these values only */
694
6.49k
  if (DCTSIZE != 8)
695
0
    return 0;
696
6.49k
  if (BITS_IN_JSAMPLE != 8)
697
0
    return 0;
698
6.49k
  if (sizeof(JDIMENSION) != 4)
699
0
    return 0;
700
6.49k
  if (sizeof(FAST_FLOAT) != 4)
701
0
    return 0;
702
703
6.49k
  if (simd_support & JSIMD_SSE2)
704
6.49k
    return 1;
705
706
0
  return 0;
707
6.49k
}
708
709
GLOBAL(void)
710
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
711
               DCTELEM *workspace)
712
72.3M
{
713
72.3M
  if (simd_support == ~0U)
714
0
    init_simd();
715
716
72.3M
  if (simd_support & JSIMD_AVX2)
717
72.3M
    jsimd_convsamp_avx2(sample_data, start_col, workspace);
718
0
  else
719
0
    jsimd_convsamp_sse2(sample_data, start_col, workspace);
720
72.3M
}
721
722
GLOBAL(void)
723
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
724
                     FAST_FLOAT *workspace)
725
15.5M
{
726
15.5M
  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
727
15.5M
}
728
729
GLOBAL(int)
730
jsimd_can_fdct_islow(void)
731
39.2k
{
732
39.2k
  init_simd();
733
734
  /* The code is optimised for these values only */
735
39.2k
  if (DCTSIZE != 8)
736
0
    return 0;
737
39.2k
  if (sizeof(DCTELEM) != 2)
738
0
    return 0;
739
740
39.2k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
741
39.2k
    return 1;
742
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
743
0
    return 1;
744
745
0
  return 0;
746
0
}
747
748
GLOBAL(int)
749
jsimd_can_fdct_ifast(void)
750
9.18k
{
751
9.18k
  init_simd();
752
753
  /* The code is optimised for these values only */
754
9.18k
  if (DCTSIZE != 8)
755
0
    return 0;
756
9.18k
  if (sizeof(DCTELEM) != 2)
757
0
    return 0;
758
759
9.18k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
760
9.18k
    return 1;
761
762
0
  return 0;
763
9.18k
}
764
765
GLOBAL(int)
766
jsimd_can_fdct_float(void)
767
6.49k
{
768
6.49k
  init_simd();
769
770
  /* The code is optimised for these values only */
771
6.49k
  if (DCTSIZE != 8)
772
0
    return 0;
773
6.49k
  if (sizeof(FAST_FLOAT) != 4)
774
0
    return 0;
775
776
6.49k
  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
777
6.49k
    return 1;
778
779
0
  return 0;
780
6.49k
}
781
782
GLOBAL(void)
783
jsimd_fdct_islow(DCTELEM *data)
784
57.1M
{
785
57.1M
  if (simd_support == ~0U)
786
0
    init_simd();
787
788
57.1M
  if (simd_support & JSIMD_AVX2)
789
57.1M
    jsimd_fdct_islow_avx2(data);
790
0
  else
791
0
    jsimd_fdct_islow_sse2(data);
792
57.1M
}
793
794
GLOBAL(void)
795
jsimd_fdct_ifast(DCTELEM *data)
796
15.2M
{
797
15.2M
  jsimd_fdct_ifast_sse2(data);
798
15.2M
}
799
800
GLOBAL(void)
801
jsimd_fdct_float(FAST_FLOAT *data)
802
15.5M
{
803
15.5M
  jsimd_fdct_float_sse(data);
804
15.5M
}
805
806
GLOBAL(int)
807
jsimd_can_quantize(void)
808
48.4k
{
809
48.4k
  init_simd();
810
811
  /* The code is optimised for these values only */
812
48.4k
  if (DCTSIZE != 8)
813
0
    return 0;
814
48.4k
  if (sizeof(JCOEF) != 2)
815
0
    return 0;
816
48.4k
  if (sizeof(DCTELEM) != 2)
817
0
    return 0;
818
819
48.4k
  if (simd_support & JSIMD_AVX2)
820
48.4k
    return 1;
821
0
  if (simd_support & JSIMD_SSE2)
822
0
    return 1;
823
824
0
  return 0;
825
0
}
826
827
GLOBAL(int)
828
jsimd_can_quantize_float(void)
829
6.49k
{
830
6.49k
  init_simd();
831
832
  /* The code is optimised for these values only */
833
6.49k
  if (DCTSIZE != 8)
834
0
    return 0;
835
6.49k
  if (sizeof(JCOEF) != 2)
836
0
    return 0;
837
6.49k
  if (sizeof(FAST_FLOAT) != 4)
838
0
    return 0;
839
840
6.49k
  if (simd_support & JSIMD_SSE2)
841
6.49k
    return 1;
842
843
0
  return 0;
844
6.49k
}
845
846
GLOBAL(void)
847
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
848
68.1M
{
849
68.1M
  if (simd_support == ~0U)
850
0
    init_simd();
851
852
68.1M
  if (simd_support & JSIMD_AVX2)
853
68.1M
    jsimd_quantize_avx2(coef_block, divisors, workspace);
854
0
  else
855
0
    jsimd_quantize_sse2(coef_block, divisors, workspace);
856
68.1M
}
857
858
GLOBAL(void)
859
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
860
                     FAST_FLOAT *workspace)
861
15.5M
{
862
15.5M
  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
863
15.5M
}
864
865
GLOBAL(int)
866
jsimd_can_idct_2x2(void)
867
2.14k
{
868
2.14k
  init_simd();
869
870
  /* The code is optimised for these values only */
871
2.14k
  if (DCTSIZE != 8)
872
0
    return 0;
873
2.14k
  if (sizeof(JCOEF) != 2)
874
0
    return 0;
875
2.14k
  if (BITS_IN_JSAMPLE != 8)
876
0
    return 0;
877
2.14k
  if (sizeof(JDIMENSION) != 4)
878
0
    return 0;
879
2.14k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
880
0
    return 0;
881
882
2.14k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
883
2.14k
    return 1;
884
885
0
  return 0;
886
2.14k
}
887
888
GLOBAL(int)
889
jsimd_can_idct_4x4(void)
890
4.41k
{
891
4.41k
  init_simd();
892
893
  /* The code is optimised for these values only */
894
4.41k
  if (DCTSIZE != 8)
895
0
    return 0;
896
4.41k
  if (sizeof(JCOEF) != 2)
897
0
    return 0;
898
4.41k
  if (BITS_IN_JSAMPLE != 8)
899
0
    return 0;
900
4.41k
  if (sizeof(JDIMENSION) != 4)
901
0
    return 0;
902
4.41k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
903
0
    return 0;
904
905
4.41k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
906
4.41k
    return 1;
907
908
0
  return 0;
909
4.41k
}
910
911
GLOBAL(void)
912
jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
913
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
914
               JDIMENSION output_col)
915
26.2M
{
916
26.2M
  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
917
26.2M
}
918
919
GLOBAL(void)
920
jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
921
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
922
               JDIMENSION output_col)
923
26.2M
{
924
26.2M
  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
925
26.2M
}
926
927
GLOBAL(int)
928
jsimd_can_idct_islow(void)
929
12.5k
{
930
12.5k
  init_simd();
931
932
  /* The code is optimised for these values only */
933
12.5k
  if (DCTSIZE != 8)
934
0
    return 0;
935
12.5k
  if (sizeof(JCOEF) != 2)
936
0
    return 0;
937
12.5k
  if (BITS_IN_JSAMPLE != 8)
938
0
    return 0;
939
12.5k
  if (sizeof(JDIMENSION) != 4)
940
0
    return 0;
941
12.5k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
942
0
    return 0;
943
944
12.5k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
945
12.5k
    return 1;
946
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
947
0
    return 1;
948
949
0
  return 0;
950
0
}
951
952
GLOBAL(int)
953
jsimd_can_idct_ifast(void)
954
10.4k
{
955
10.4k
  init_simd();
956
957
  /* The code is optimised for these values only */
958
10.4k
  if (DCTSIZE != 8)
959
0
    return 0;
960
10.4k
  if (sizeof(JCOEF) != 2)
961
0
    return 0;
962
10.4k
  if (BITS_IN_JSAMPLE != 8)
963
0
    return 0;
964
10.4k
  if (sizeof(JDIMENSION) != 4)
965
0
    return 0;
966
10.4k
  if (sizeof(IFAST_MULT_TYPE) != 2)
967
0
    return 0;
968
10.4k
  if (IFAST_SCALE_BITS != 2)
969
0
    return 0;
970
971
10.4k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
972
10.4k
    return 1;
973
974
0
  return 0;
975
10.4k
}
976
977
GLOBAL(int)
978
jsimd_can_idct_float(void)
979
84.3k
{
980
84.3k
  init_simd();
981
982
84.3k
  if (DCTSIZE != 8)
983
0
    return 0;
984
84.3k
  if (sizeof(JCOEF) != 2)
985
0
    return 0;
986
84.3k
  if (BITS_IN_JSAMPLE != 8)
987
0
    return 0;
988
84.3k
  if (sizeof(JDIMENSION) != 4)
989
0
    return 0;
990
84.3k
  if (sizeof(FAST_FLOAT) != 4)
991
0
    return 0;
992
84.3k
  if (sizeof(FLOAT_MULT_TYPE) != 4)
993
0
    return 0;
994
995
84.3k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
996
84.3k
    return 1;
997
998
0
  return 0;
999
84.3k
}
1000
1001
GLOBAL(void)
1002
jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1003
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1004
                 JDIMENSION output_col)
1005
40.1M
{
1006
40.1M
  if (simd_support == ~0U)
1007
0
    init_simd();
1008
1009
40.1M
  if (simd_support & JSIMD_AVX2)
1010
40.1M
    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1011
40.1M
                          output_col);
1012
0
  else
1013
0
    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1014
0
                          output_col);
1015
40.1M
}
1016
1017
GLOBAL(void)
1018
jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1019
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1020
                 JDIMENSION output_col)
1021
73.6M
{
1022
73.6M
  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1023
73.6M
                        output_col);
1024
73.6M
}
1025
1026
GLOBAL(void)
1027
jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1028
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1029
                 JDIMENSION output_col)
1030
180M
{
1031
180M
  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1032
180M
                        output_col);
1033
180M
}
1034
1035
GLOBAL(int)
1036
jsimd_can_huff_encode_one_block(void)
1037
80.3k
{
1038
80.3k
  init_simd();
1039
1040
80.3k
  if (DCTSIZE != 8)
1041
0
    return 0;
1042
80.3k
  if (sizeof(JCOEF) != 2)
1043
0
    return 0;
1044
1045
80.3k
  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1046
80.3k
      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1047
80.3k
    return 1;
1048
1049
0
  return 0;
1050
80.3k
}
1051
1052
GLOBAL(JOCTET *)
1053
jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1054
                            int last_dc_val, c_derived_tbl *dctbl,
1055
                            c_derived_tbl *actbl)
1056
110M
{
1057
110M
  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1058
110M
                                          dctbl, actbl);
1059
110M
}
1060
1061
GLOBAL(int)
1062
jsimd_can_encode_mcu_AC_first_prepare(void)
1063
85.8k
{
1064
85.8k
  init_simd();
1065
1066
85.8k
  if (DCTSIZE != 8)
1067
0
    return 0;
1068
85.8k
  if (sizeof(JCOEF) != 2)
1069
0
    return 0;
1070
85.8k
  if (simd_support & JSIMD_SSE2)
1071
85.8k
    return 1;
1072
1073
0
  return 0;
1074
85.8k
}
1075
1076
GLOBAL(void)
1077
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1078
                                  const int *jpeg_natural_order_start, int Sl,
1079
                                  int Al, UJCOEF *values, size_t *zerobits)
1080
253M
{
1081
253M
  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1082
253M
                                         Sl, Al, values, zerobits);
1083
253M
}
1084
1085
GLOBAL(int)
1086
jsimd_can_encode_mcu_AC_refine_prepare(void)
1087
65.3k
{
1088
65.3k
  init_simd();
1089
1090
65.3k
  if (DCTSIZE != 8)
1091
0
    return 0;
1092
65.3k
  if (sizeof(JCOEF) != 2)
1093
0
    return 0;
1094
65.3k
  if (simd_support & JSIMD_SSE2)
1095
65.3k
    return 1;
1096
1097
0
  return 0;
1098
65.3k
}
1099
1100
GLOBAL(int)
1101
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1102
                                   const int *jpeg_natural_order_start, int Sl,
1103
                                   int Al, UJCOEF *absvalues, size_t *bits)
1104
249M
{
1105
249M
  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1106
249M
                                                 jpeg_natural_order_start,
1107
249M
                                                 Sl, Al, absvalues, bits);
1108
249M
}