Coverage Report

Created: 2023-06-07 06:03

/src/libjpeg-turbo.2.0.x/simd/x86_64/jsimd.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * jsimd_x86_64.c
3
 *
4
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
6
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7
 *
8
 * Based on the x86 SIMD extension for IJG JPEG library,
9
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10
 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11
 *
12
 * This file contains the interface between the "normal" portions
13
 * of the library and the SIMD implementations when running on a
14
 * 64-bit x86 architecture.
15
 */
16
17
#define JPEG_INTERNALS
18
#include "../../jinclude.h"
19
#include "../../jpeglib.h"
20
#include "../../jsimd.h"
21
#include "../../jdct.h"
22
#include "../../jsimddct.h"
23
#include "../jsimd.h"
24
#include "jconfigint.h"
25
26
/*
27
 * In the PIC cases, we have no guarantee that constants will keep
28
 * their alignment. This macro allows us to verify it at runtime.
29
 */
30
241k
#define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
31
32
149k
#define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33
91.8k
#define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
34
35
static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
36
static THREAD_LOCAL unsigned int simd_huffman = 1;
37
38
/*
39
 * Check what SIMD accelerations are supported.
40
 */
41
LOCAL(void)
42
init_simd(void)
43
217k
{
44
217k
#ifndef NO_GETENV
45
217k
  char *env = NULL;
46
217k
#endif
47
48
217k
  if (simd_support != ~0U)
49
217k
    return;
50
51
6
  simd_support = jpeg_simd_cpu_support();
52
53
6
#ifndef NO_GETENV
54
  /* Force different settings through environment variables */
55
6
  env = getenv("JSIMD_FORCESSE2");
56
6
  if ((env != NULL) && (strcmp(env, "1") == 0))
57
0
    simd_support &= JSIMD_SSE2;
58
6
  env = getenv("JSIMD_FORCEAVX2");
59
6
  if ((env != NULL) && (strcmp(env, "1") == 0))
60
0
    simd_support &= JSIMD_AVX2;
61
6
  env = getenv("JSIMD_FORCENONE");
62
6
  if ((env != NULL) && (strcmp(env, "1") == 0))
63
0
    simd_support = 0;
64
6
  env = getenv("JSIMD_NOHUFFENC");
65
6
  if ((env != NULL) && (strcmp(env, "1") == 0))
66
0
    simd_huffman = 0;
67
6
#endif
68
6
}
69
70
GLOBAL(int)
71
jsimd_can_rgb_ycc(void)
72
42.0k
{
73
42.0k
  init_simd();
74
75
  /* The code is optimised for these values only */
76
42.0k
  if (BITS_IN_JSAMPLE != 8)
77
0
    return 0;
78
42.0k
  if (sizeof(JDIMENSION) != 4)
79
0
    return 0;
80
42.0k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
81
0
    return 0;
82
83
42.0k
  if ((simd_support & JSIMD_AVX2) &&
84
42.0k
      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
85
42.0k
    return 1;
86
0
  if ((simd_support & JSIMD_SSE2) &&
87
0
      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
88
0
    return 1;
89
90
0
  return 0;
91
0
}
92
93
GLOBAL(int)
94
jsimd_can_rgb_gray(void)
95
9.53k
{
96
9.53k
  init_simd();
97
98
  /* The code is optimised for these values only */
99
9.53k
  if (BITS_IN_JSAMPLE != 8)
100
0
    return 0;
101
9.53k
  if (sizeof(JDIMENSION) != 4)
102
0
    return 0;
103
9.53k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
104
0
    return 0;
105
106
9.53k
  if ((simd_support & JSIMD_AVX2) &&
107
9.53k
      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
108
9.53k
    return 1;
109
0
  if ((simd_support & JSIMD_SSE2) &&
110
0
      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
111
0
    return 1;
112
113
0
  return 0;
114
0
}
115
116
GLOBAL(int)
117
jsimd_can_ycc_rgb(void)
118
4.17k
{
119
4.17k
  init_simd();
120
121
  /* The code is optimised for these values only */
122
4.17k
  if (BITS_IN_JSAMPLE != 8)
123
0
    return 0;
124
4.17k
  if (sizeof(JDIMENSION) != 4)
125
0
    return 0;
126
4.17k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
127
0
    return 0;
128
129
4.17k
  if ((simd_support & JSIMD_AVX2) &&
130
4.17k
      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
131
4.17k
    return 1;
132
0
  if ((simd_support & JSIMD_SSE2) &&
133
0
      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
134
0
    return 1;
135
136
0
  return 0;
137
0
}
138
139
GLOBAL(int)
140
jsimd_can_ycc_rgb565(void)
141
0
{
142
0
  return 0;
143
0
}
144
145
GLOBAL(void)
146
jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
147
                      JSAMPIMAGE output_buf, JDIMENSION output_row,
148
                      int num_rows)
149
115M
{
150
115M
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
151
115M
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
152
153
115M
  if (simd_support == ~0U)
154
0
    init_simd();
155
156
115M
  switch (cinfo->in_color_space) {
157
29.5M
  case JCS_EXT_RGB:
158
29.5M
    avx2fct = jsimd_extrgb_ycc_convert_avx2;
159
29.5M
    sse2fct = jsimd_extrgb_ycc_convert_sse2;
160
29.5M
    break;
161
7.00M
  case JCS_EXT_RGBX:
162
7.00M
  case JCS_EXT_RGBA:
163
7.00M
    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
164
7.00M
    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
165
7.00M
    break;
166
21.7M
  case JCS_EXT_BGR:
167
21.7M
    avx2fct = jsimd_extbgr_ycc_convert_avx2;
168
21.7M
    sse2fct = jsimd_extbgr_ycc_convert_sse2;
169
21.7M
    break;
170
0
  case JCS_EXT_BGRX:
171
14.0M
  case JCS_EXT_BGRA:
172
14.0M
    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
173
14.0M
    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
174
14.0M
    break;
175
15.5M
  case JCS_EXT_XBGR:
176
15.5M
  case JCS_EXT_ABGR:
177
15.5M
    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
178
15.5M
    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
179
15.5M
    break;
180
15.5M
  case JCS_EXT_XRGB:
181
15.5M
  case JCS_EXT_ARGB:
182
15.5M
    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
183
15.5M
    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
184
15.5M
    break;
185
12.1M
  default:
186
12.1M
    avx2fct = jsimd_rgb_ycc_convert_avx2;
187
12.1M
    sse2fct = jsimd_rgb_ycc_convert_sse2;
188
12.1M
    break;
189
115M
  }
190
191
115M
  if (simd_support & JSIMD_AVX2)
192
115M
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
193
0
  else
194
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
195
115M
}
196
197
GLOBAL(void)
198
jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
199
                       JSAMPIMAGE output_buf, JDIMENSION output_row,
200
                       int num_rows)
201
29.5M
{
202
29.5M
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
203
29.5M
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
204
205
29.5M
  if (simd_support == ~0U)
206
0
    init_simd();
207
208
29.5M
  switch (cinfo->in_color_space) {
209
0
  case JCS_EXT_RGB:
210
0
    avx2fct = jsimd_extrgb_gray_convert_avx2;
211
0
    sse2fct = jsimd_extrgb_gray_convert_sse2;
212
0
    break;
213
0
  case JCS_EXT_RGBX:
214
0
  case JCS_EXT_RGBA:
215
0
    avx2fct = jsimd_extrgbx_gray_convert_avx2;
216
0
    sse2fct = jsimd_extrgbx_gray_convert_sse2;
217
0
    break;
218
15.5M
  case JCS_EXT_BGR:
219
15.5M
    avx2fct = jsimd_extbgr_gray_convert_avx2;
220
15.5M
    sse2fct = jsimd_extbgr_gray_convert_sse2;
221
15.5M
    break;
222
0
  case JCS_EXT_BGRX:
223
0
  case JCS_EXT_BGRA:
224
0
    avx2fct = jsimd_extbgrx_gray_convert_avx2;
225
0
    sse2fct = jsimd_extbgrx_gray_convert_sse2;
226
0
    break;
227
0
  case JCS_EXT_XBGR:
228
0
  case JCS_EXT_ABGR:
229
0
    avx2fct = jsimd_extxbgr_gray_convert_avx2;
230
0
    sse2fct = jsimd_extxbgr_gray_convert_sse2;
231
0
    break;
232
14.0M
  case JCS_EXT_XRGB:
233
14.0M
  case JCS_EXT_ARGB:
234
14.0M
    avx2fct = jsimd_extxrgb_gray_convert_avx2;
235
14.0M
    sse2fct = jsimd_extxrgb_gray_convert_sse2;
236
14.0M
    break;
237
0
  default:
238
0
    avx2fct = jsimd_rgb_gray_convert_avx2;
239
0
    sse2fct = jsimd_rgb_gray_convert_sse2;
240
0
    break;
241
29.5M
  }
242
243
29.5M
  if (simd_support & JSIMD_AVX2)
244
29.5M
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
245
0
  else
246
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
247
29.5M
}
248
249
GLOBAL(void)
250
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
251
                      JDIMENSION input_row, JSAMPARRAY output_buf,
252
                      int num_rows)
253
5.06M
{
254
5.06M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
255
5.06M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
256
257
5.06M
  if (simd_support == ~0U)
258
0
    init_simd();
259
260
5.06M
  switch (cinfo->out_color_space) {
261
3.51M
  case JCS_EXT_RGB:
262
3.51M
    avx2fct = jsimd_ycc_extrgb_convert_avx2;
263
3.51M
    sse2fct = jsimd_ycc_extrgb_convert_sse2;
264
3.51M
    break;
265
0
  case JCS_EXT_RGBX:
266
0
  case JCS_EXT_RGBA:
267
0
    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
268
0
    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
269
0
    break;
270
892k
  case JCS_EXT_BGR:
271
892k
    avx2fct = jsimd_ycc_extbgr_convert_avx2;
272
892k
    sse2fct = jsimd_ycc_extbgr_convert_sse2;
273
892k
    break;
274
370k
  case JCS_EXT_BGRX:
275
370k
  case JCS_EXT_BGRA:
276
370k
    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
277
370k
    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
278
370k
    break;
279
0
  case JCS_EXT_XBGR:
280
0
  case JCS_EXT_ABGR:
281
0
    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
282
0
    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
283
0
    break;
284
288k
  case JCS_EXT_XRGB:
285
288k
  case JCS_EXT_ARGB:
286
288k
    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
287
288k
    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
288
288k
    break;
289
0
  default:
290
0
    avx2fct = jsimd_ycc_rgb_convert_avx2;
291
0
    sse2fct = jsimd_ycc_rgb_convert_sse2;
292
0
    break;
293
5.06M
  }
294
295
5.06M
  if (simd_support & JSIMD_AVX2)
296
5.06M
    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
297
0
  else
298
0
    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
299
5.06M
}
300
301
GLOBAL(void)
302
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
303
                         JDIMENSION input_row, JSAMPARRAY output_buf,
304
                         int num_rows)
305
0
{
306
0
}
307
308
GLOBAL(int)
309
jsimd_can_h2v2_downsample(void)
310
19.0k
{
311
19.0k
  init_simd();
312
313
  /* The code is optimised for these values only */
314
19.0k
  if (BITS_IN_JSAMPLE != 8)
315
0
    return 0;
316
19.0k
  if (sizeof(JDIMENSION) != 4)
317
0
    return 0;
318
319
19.0k
  if (simd_support & JSIMD_AVX2)
320
19.0k
    return 1;
321
0
  if (simd_support & JSIMD_SSE2)
322
0
    return 1;
323
324
0
  return 0;
325
0
}
326
327
GLOBAL(int)
328
jsimd_can_h2v1_downsample(void)
329
22.9k
{
330
22.9k
  init_simd();
331
332
  /* The code is optimised for these values only */
333
22.9k
  if (BITS_IN_JSAMPLE != 8)
334
0
    return 0;
335
22.9k
  if (sizeof(JDIMENSION) != 4)
336
0
    return 0;
337
338
22.9k
  if (simd_support & JSIMD_AVX2)
339
22.9k
    return 1;
340
0
  if (simd_support & JSIMD_SSE2)
341
0
    return 1;
342
343
0
  return 0;
344
0
}
345
346
GLOBAL(void)
347
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
348
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
349
29.5M
{
350
29.5M
  if (simd_support == ~0U)
351
0
    init_simd();
352
353
29.5M
  if (simd_support & JSIMD_AVX2)
354
29.5M
    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
355
29.5M
                               compptr->v_samp_factor,
356
29.5M
                               compptr->width_in_blocks, input_data,
357
29.5M
                               output_data);
358
0
  else
359
0
    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
360
0
                               compptr->v_samp_factor,
361
0
                               compptr->width_in_blocks, input_data,
362
0
                               output_data);
363
29.5M
}
364
365
GLOBAL(void)
366
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
367
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
368
65.1M
{
369
65.1M
  if (simd_support == ~0U)
370
0
    init_simd();
371
372
65.1M
  if (simd_support & JSIMD_AVX2)
373
65.1M
    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
374
65.1M
                               compptr->v_samp_factor,
375
65.1M
                               compptr->width_in_blocks, input_data,
376
65.1M
                               output_data);
377
0
  else
378
0
    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
379
0
                               compptr->v_samp_factor,
380
0
                               compptr->width_in_blocks, input_data,
381
0
                               output_data);
382
65.1M
}
383
384
GLOBAL(int)
385
jsimd_can_h2v2_upsample(void)
386
1.71k
{
387
1.71k
  init_simd();
388
389
  /* The code is optimised for these values only */
390
1.71k
  if (BITS_IN_JSAMPLE != 8)
391
0
    return 0;
392
1.71k
  if (sizeof(JDIMENSION) != 4)
393
0
    return 0;
394
395
1.71k
  if (simd_support & JSIMD_AVX2)
396
1.71k
    return 1;
397
0
  if (simd_support & JSIMD_SSE2)
398
0
    return 1;
399
400
0
  return 0;
401
0
}
402
403
GLOBAL(int)
404
jsimd_can_h2v1_upsample(void)
405
2.19k
{
406
2.19k
  init_simd();
407
408
  /* The code is optimised for these values only */
409
2.19k
  if (BITS_IN_JSAMPLE != 8)
410
0
    return 0;
411
2.19k
  if (sizeof(JDIMENSION) != 4)
412
0
    return 0;
413
414
2.19k
  if (simd_support & JSIMD_AVX2)
415
2.19k
    return 1;
416
0
  if (simd_support & JSIMD_SSE2)
417
0
    return 1;
418
419
0
  return 0;
420
0
}
421
422
GLOBAL(void)
423
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
424
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
425
1.25M
{
426
1.25M
  if (simd_support == ~0U)
427
0
    init_simd();
428
429
1.25M
  if (simd_support & JSIMD_AVX2)
430
1.25M
    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
431
1.25M
                             input_data, output_data_ptr);
432
0
  else
433
0
    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
434
0
                             input_data, output_data_ptr);
435
1.25M
}
436
437
GLOBAL(void)
438
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
439
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
440
3.29M
{
441
3.29M
  if (simd_support == ~0U)
442
0
    init_simd();
443
444
3.29M
  if (simd_support & JSIMD_AVX2)
445
3.29M
    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
446
3.29M
                             input_data, output_data_ptr);
447
0
  else
448
0
    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
449
0
                             input_data, output_data_ptr);
450
3.29M
}
451
452
GLOBAL(int)
453
jsimd_can_h2v2_fancy_upsample(void)
454
121
{
455
121
  init_simd();
456
457
  /* The code is optimised for these values only */
458
121
  if (BITS_IN_JSAMPLE != 8)
459
0
    return 0;
460
121
  if (sizeof(JDIMENSION) != 4)
461
0
    return 0;
462
463
121
  if ((simd_support & JSIMD_AVX2) &&
464
121
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
465
121
    return 1;
466
0
  if ((simd_support & JSIMD_SSE2) &&
467
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
468
0
    return 1;
469
470
0
  return 0;
471
0
}
472
473
GLOBAL(int)
474
jsimd_can_h2v1_fancy_upsample(void)
475
356
{
476
356
  init_simd();
477
478
  /* The code is optimised for these values only */
479
356
  if (BITS_IN_JSAMPLE != 8)
480
0
    return 0;
481
356
  if (sizeof(JDIMENSION) != 4)
482
0
    return 0;
483
484
356
  if ((simd_support & JSIMD_AVX2) &&
485
356
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
486
356
    return 1;
487
0
  if ((simd_support & JSIMD_SSE2) &&
488
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
489
0
    return 1;
490
491
0
  return 0;
492
0
}
493
494
GLOBAL(void)
495
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
496
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
497
75.2k
{
498
75.2k
  if (simd_support == ~0U)
499
0
    init_simd();
500
501
75.2k
  if (simd_support & JSIMD_AVX2)
502
75.2k
    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
503
75.2k
                                   compptr->downsampled_width, input_data,
504
75.2k
                                   output_data_ptr);
505
0
  else
506
0
    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
507
0
                                   compptr->downsampled_width, input_data,
508
0
                                   output_data_ptr);
509
75.2k
}
510
511
GLOBAL(void)
512
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
513
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
514
171k
{
515
171k
  if (simd_support == ~0U)
516
0
    init_simd();
517
518
171k
  if (simd_support & JSIMD_AVX2)
519
171k
    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
520
171k
                                   compptr->downsampled_width, input_data,
521
171k
                                   output_data_ptr);
522
0
  else
523
0
    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
524
0
                                   compptr->downsampled_width, input_data,
525
0
                                   output_data_ptr);
526
171k
}
527
528
GLOBAL(int)
529
jsimd_can_h2v2_merged_upsample(void)
530
3.70k
{
531
3.70k
  init_simd();
532
533
  /* The code is optimised for these values only */
534
3.70k
  if (BITS_IN_JSAMPLE != 8)
535
0
    return 0;
536
3.70k
  if (sizeof(JDIMENSION) != 4)
537
0
    return 0;
538
539
3.70k
  if ((simd_support & JSIMD_AVX2) &&
540
3.70k
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
541
3.70k
    return 1;
542
0
  if ((simd_support & JSIMD_SSE2) &&
543
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
544
0
    return 1;
545
546
0
  return 0;
547
0
}
548
549
GLOBAL(int)
550
jsimd_can_h2v1_merged_upsample(void)
551
329
{
552
329
  init_simd();
553
554
  /* The code is optimised for these values only */
555
329
  if (BITS_IN_JSAMPLE != 8)
556
0
    return 0;
557
329
  if (sizeof(JDIMENSION) != 4)
558
0
    return 0;
559
560
329
  if ((simd_support & JSIMD_AVX2) &&
561
329
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
562
329
    return 1;
563
0
  if ((simd_support & JSIMD_SSE2) &&
564
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
565
0
    return 1;
566
567
0
  return 0;
568
0
}
569
570
GLOBAL(void)
571
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
572
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
573
1.26M
{
574
1.26M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
575
1.26M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
576
577
1.26M
  if (simd_support == ~0U)
578
0
    init_simd();
579
580
1.26M
  switch (cinfo->out_color_space) {
581
683k
  case JCS_EXT_RGB:
582
683k
    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
583
683k
    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
584
683k
    break;
585
0
  case JCS_EXT_RGBX:
586
0
  case JCS_EXT_RGBA:
587
0
    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
588
0
    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
589
0
    break;
590
423k
  case JCS_EXT_BGR:
591
423k
    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
592
423k
    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
593
423k
    break;
594
0
  case JCS_EXT_BGRX:
595
0
  case JCS_EXT_BGRA:
596
0
    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
597
0
    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
598
0
    break;
599
0
  case JCS_EXT_XBGR:
600
0
  case JCS_EXT_ABGR:
601
0
    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
602
0
    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
603
0
    break;
604
154k
  case JCS_EXT_XRGB:
605
154k
  case JCS_EXT_ARGB:
606
154k
    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
607
154k
    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
608
154k
    break;
609
0
  default:
610
0
    avx2fct = jsimd_h2v2_merged_upsample_avx2;
611
0
    sse2fct = jsimd_h2v2_merged_upsample_sse2;
612
0
    break;
613
1.26M
  }
614
615
1.26M
  if (simd_support & JSIMD_AVX2)
616
1.26M
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
617
0
  else
618
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
619
1.26M
}
620
621
GLOBAL(void)
622
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
623
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
624
957k
{
625
957k
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
626
957k
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
627
628
957k
  if (simd_support == ~0U)
629
0
    init_simd();
630
631
957k
  switch (cinfo->out_color_space) {
632
453k
  case JCS_EXT_RGB:
633
453k
    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
634
453k
    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
635
453k
    break;
636
0
  case JCS_EXT_RGBX:
637
0
  case JCS_EXT_RGBA:
638
0
    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
639
0
    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
640
0
    break;
641
394k
  case JCS_EXT_BGR:
642
394k
    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
643
394k
    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
644
394k
    break;
645
0
  case JCS_EXT_BGRX:
646
0
  case JCS_EXT_BGRA:
647
0
    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
648
0
    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
649
0
    break;
650
0
  case JCS_EXT_XBGR:
651
0
  case JCS_EXT_ABGR:
652
0
    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
653
0
    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
654
0
    break;
655
109k
  case JCS_EXT_XRGB:
656
109k
  case JCS_EXT_ARGB:
657
109k
    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
658
109k
    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
659
109k
    break;
660
0
  default:
661
0
    avx2fct = jsimd_h2v1_merged_upsample_avx2;
662
0
    sse2fct = jsimd_h2v1_merged_upsample_sse2;
663
0
    break;
664
957k
  }
665
666
957k
  if (simd_support & JSIMD_AVX2)
667
957k
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
668
0
  else
669
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
670
957k
}
671
672
GLOBAL(int)
673
jsimd_can_convsamp(void)
674
59.6k
{
675
59.6k
  init_simd();
676
677
  /* The code is optimised for these values only */
678
59.6k
  if (DCTSIZE != 8)
679
0
    return 0;
680
59.6k
  if (BITS_IN_JSAMPLE != 8)
681
0
    return 0;
682
59.6k
  if (sizeof(JDIMENSION) != 4)
683
0
    return 0;
684
59.6k
  if (sizeof(DCTELEM) != 2)
685
0
    return 0;
686
687
59.6k
  if (simd_support & JSIMD_AVX2)
688
59.6k
    return 1;
689
0
  if (simd_support & JSIMD_SSE2)
690
0
    return 1;
691
692
0
  return 0;
693
0
}
694
695
GLOBAL(int)
696
jsimd_can_convsamp_float(void)
697
9.17k
{
698
9.17k
  init_simd();
699
700
  /* The code is optimised for these values only */
701
9.17k
  if (DCTSIZE != 8)
702
0
    return 0;
703
9.17k
  if (BITS_IN_JSAMPLE != 8)
704
0
    return 0;
705
9.17k
  if (sizeof(JDIMENSION) != 4)
706
0
    return 0;
707
9.17k
  if (sizeof(FAST_FLOAT) != 4)
708
0
    return 0;
709
710
9.17k
  if (simd_support & JSIMD_SSE2)
711
9.17k
    return 1;
712
713
0
  return 0;
714
9.17k
}
715
716
GLOBAL(void)
717
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
718
               DCTELEM *workspace)
719
82.4M
{
720
82.4M
  if (simd_support == ~0U)
721
0
    init_simd();
722
723
82.4M
  if (simd_support & JSIMD_AVX2)
724
82.4M
    jsimd_convsamp_avx2(sample_data, start_col, workspace);
725
0
  else
726
0
    jsimd_convsamp_sse2(sample_data, start_col, workspace);
727
82.4M
}
728
729
GLOBAL(void)
730
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
731
                     FAST_FLOAT *workspace)
732
16.7M
{
733
16.7M
  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
734
16.7M
}
735
736
GLOBAL(int)
737
jsimd_can_fdct_islow(void)
738
26.6k
{
739
26.6k
  init_simd();
740
741
  /* The code is optimised for these values only */
742
26.6k
  if (DCTSIZE != 8)
743
0
    return 0;
744
26.6k
  if (sizeof(DCTELEM) != 2)
745
0
    return 0;
746
747
26.6k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
748
26.6k
    return 1;
749
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
750
0
    return 1;
751
752
0
  return 0;
753
0
}
754
755
GLOBAL(int)
756
jsimd_can_fdct_ifast(void)
757
33.0k
{
758
33.0k
  init_simd();
759
760
  /* The code is optimised for these values only */
761
33.0k
  if (DCTSIZE != 8)
762
0
    return 0;
763
33.0k
  if (sizeof(DCTELEM) != 2)
764
0
    return 0;
765
766
33.0k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
767
33.0k
    return 1;
768
769
0
  return 0;
770
33.0k
}
771
772
GLOBAL(int)
773
jsimd_can_fdct_float(void)
774
9.17k
{
775
9.17k
  init_simd();
776
777
  /* The code is optimised for these values only */
778
9.17k
  if (DCTSIZE != 8)
779
0
    return 0;
780
9.17k
  if (sizeof(FAST_FLOAT) != 4)
781
0
    return 0;
782
783
9.17k
  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
784
9.17k
    return 1;
785
786
0
  return 0;
787
9.17k
}
788
789
GLOBAL(void)
790
jsimd_fdct_islow(DCTELEM *data)
791
39.8M
{
792
39.8M
  if (simd_support == ~0U)
793
0
    init_simd();
794
795
39.8M
  if (simd_support & JSIMD_AVX2)
796
39.8M
    jsimd_fdct_islow_avx2(data);
797
0
  else
798
0
    jsimd_fdct_islow_sse2(data);
799
39.8M
}
800
801
GLOBAL(void)
802
jsimd_fdct_ifast(DCTELEM *data)
803
42.5M
{
804
42.5M
  jsimd_fdct_ifast_sse2(data);
805
42.5M
}
806
807
GLOBAL(void)
808
jsimd_fdct_float(FAST_FLOAT *data)
809
16.7M
{
810
16.7M
  jsimd_fdct_float_sse(data);
811
16.7M
}
812
813
GLOBAL(int)
814
jsimd_can_quantize(void)
815
59.6k
{
816
59.6k
  init_simd();
817
818
  /* The code is optimised for these values only */
819
59.6k
  if (DCTSIZE != 8)
820
0
    return 0;
821
59.6k
  if (sizeof(JCOEF) != 2)
822
0
    return 0;
823
59.6k
  if (sizeof(DCTELEM) != 2)
824
0
    return 0;
825
826
59.6k
  if (simd_support & JSIMD_AVX2)
827
59.6k
    return 1;
828
0
  if (simd_support & JSIMD_SSE2)
829
0
    return 1;
830
831
0
  return 0;
832
0
}
833
834
GLOBAL(int)
835
jsimd_can_quantize_float(void)
836
9.17k
{
837
9.17k
  init_simd();
838
839
  /* The code is optimised for these values only */
840
9.17k
  if (DCTSIZE != 8)
841
0
    return 0;
842
9.17k
  if (sizeof(JCOEF) != 2)
843
0
    return 0;
844
9.17k
  if (sizeof(FAST_FLOAT) != 4)
845
0
    return 0;
846
847
9.17k
  if (simd_support & JSIMD_SSE2)
848
9.17k
    return 1;
849
850
0
  return 0;
851
9.17k
}
852
853
GLOBAL(void)
854
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
855
82.4M
{
856
82.4M
  if (simd_support == ~0U)
857
0
    init_simd();
858
859
82.4M
  if (simd_support & JSIMD_AVX2)
860
82.4M
    jsimd_quantize_avx2(coef_block, divisors, workspace);
861
0
  else
862
0
    jsimd_quantize_sse2(coef_block, divisors, workspace);
863
82.4M
}
864
865
GLOBAL(void)
866
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
867
                     FAST_FLOAT *workspace)
868
16.7M
{
869
16.7M
  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
870
16.7M
}
871
872
GLOBAL(int)
873
jsimd_can_idct_2x2(void)
874
779
{
875
779
  init_simd();
876
877
  /* The code is optimised for these values only */
878
779
  if (DCTSIZE != 8)
879
0
    return 0;
880
779
  if (sizeof(JCOEF) != 2)
881
0
    return 0;
882
779
  if (BITS_IN_JSAMPLE != 8)
883
0
    return 0;
884
779
  if (sizeof(JDIMENSION) != 4)
885
0
    return 0;
886
779
  if (sizeof(ISLOW_MULT_TYPE) != 2)
887
0
    return 0;
888
889
779
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
890
779
    return 1;
891
892
0
  return 0;
893
779
}
894
895
GLOBAL(int)
896
jsimd_can_idct_4x4(void)
897
2.65k
{
898
2.65k
  init_simd();
899
900
  /* The code is optimised for these values only */
901
2.65k
  if (DCTSIZE != 8)
902
0
    return 0;
903
2.65k
  if (sizeof(JCOEF) != 2)
904
0
    return 0;
905
2.65k
  if (BITS_IN_JSAMPLE != 8)
906
0
    return 0;
907
2.65k
  if (sizeof(JDIMENSION) != 4)
908
0
    return 0;
909
2.65k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
910
0
    return 0;
911
912
2.65k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
913
2.65k
    return 1;
914
915
0
  return 0;
916
2.65k
}
917
918
GLOBAL(void)
919
jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
920
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
921
               JDIMENSION output_col)
922
3.71M
{
923
3.71M
  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
924
3.71M
}
925
926
GLOBAL(void)
927
jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
928
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
929
               JDIMENSION output_col)
930
4.25M
{
931
4.25M
  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
932
4.25M
}
933
934
GLOBAL(int)
935
jsimd_can_idct_islow(void)
936
4.95k
{
937
4.95k
  init_simd();
938
939
  /* The code is optimised for these values only */
940
4.95k
  if (DCTSIZE != 8)
941
0
    return 0;
942
4.95k
  if (sizeof(JCOEF) != 2)
943
0
    return 0;
944
4.95k
  if (BITS_IN_JSAMPLE != 8)
945
0
    return 0;
946
4.95k
  if (sizeof(JDIMENSION) != 4)
947
0
    return 0;
948
4.95k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
949
0
    return 0;
950
951
4.95k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
952
4.95k
    return 1;
953
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
954
0
    return 1;
955
956
0
  return 0;
957
0
}
958
959
GLOBAL(int)
960
jsimd_can_idct_ifast(void)
961
24.2k
{
962
24.2k
  init_simd();
963
964
  /* The code is optimised for these values only */
965
24.2k
  if (DCTSIZE != 8)
966
0
    return 0;
967
24.2k
  if (sizeof(JCOEF) != 2)
968
0
    return 0;
969
24.2k
  if (BITS_IN_JSAMPLE != 8)
970
0
    return 0;
971
24.2k
  if (sizeof(JDIMENSION) != 4)
972
0
    return 0;
973
24.2k
  if (sizeof(IFAST_MULT_TYPE) != 2)
974
0
    return 0;
975
24.2k
  if (IFAST_SCALE_BITS != 2)
976
0
    return 0;
977
978
24.2k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
979
24.2k
    return 1;
980
981
0
  return 0;
982
24.2k
}
983
984
GLOBAL(int)
985
jsimd_can_idct_float(void)
986
0
{
987
0
  init_simd();
988
989
0
  if (DCTSIZE != 8)
990
0
    return 0;
991
0
  if (sizeof(JCOEF) != 2)
992
0
    return 0;
993
0
  if (BITS_IN_JSAMPLE != 8)
994
0
    return 0;
995
0
  if (sizeof(JDIMENSION) != 4)
996
0
    return 0;
997
0
  if (sizeof(FAST_FLOAT) != 4)
998
0
    return 0;
999
0
  if (sizeof(FLOAT_MULT_TYPE) != 4)
1000
0
    return 0;
1001
1002
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1003
0
    return 1;
1004
1005
0
  return 0;
1006
0
}
1007
1008
GLOBAL(void)
1009
jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1010
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1011
                 JDIMENSION output_col)
1012
9.41M
{
1013
9.41M
  if (simd_support == ~0U)
1014
0
    init_simd();
1015
1016
9.41M
  if (simd_support & JSIMD_AVX2)
1017
9.41M
    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1018
9.41M
                          output_col);
1019
0
  else
1020
0
    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1021
0
                          output_col);
1022
9.41M
}
1023
1024
GLOBAL(void)
1025
jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1026
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1027
                 JDIMENSION output_col)
1028
85.1M
{
1029
85.1M
  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1030
85.1M
                        output_col);
1031
85.1M
}
1032
1033
GLOBAL(void)
1034
jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1035
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1036
                 JDIMENSION output_col)
1037
0
{
1038
0
  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1039
0
                        output_col);
1040
0
}
1041
1042
GLOBAL(int)
1043
jsimd_can_huff_encode_one_block(void)
1044
79.7k
{
1045
79.7k
  init_simd();
1046
1047
79.7k
  if (DCTSIZE != 8)
1048
0
    return 0;
1049
79.7k
  if (sizeof(JCOEF) != 2)
1050
0
    return 0;
1051
1052
79.7k
  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1053
79.7k
      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1054
79.7k
    return 1;
1055
1056
0
  return 0;
1057
79.7k
}
1058
1059
GLOBAL(JOCTET *)
1060
jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1061
                            int last_dc_val, c_derived_tbl *dctbl,
1062
                            c_derived_tbl *actbl)
1063
95.1M
{
1064
95.1M
  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1065
95.1M
                                          dctbl, actbl);
1066
95.1M
}
1067
1068
GLOBAL(int)
1069
jsimd_can_encode_mcu_AC_first_prepare(void)
1070
47.7k
{
1071
47.7k
  init_simd();
1072
1073
47.7k
  if (DCTSIZE != 8)
1074
0
    return 0;
1075
47.7k
  if (sizeof(JCOEF) != 2)
1076
0
    return 0;
1077
47.7k
  if (SIZEOF_SIZE_T != 8)
1078
0
    return 0;
1079
47.7k
  if (simd_support & JSIMD_SSE2)
1080
47.7k
    return 1;
1081
1082
0
  return 0;
1083
47.7k
}
1084
1085
GLOBAL(void)
1086
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1087
                                  const int *jpeg_natural_order_start, int Sl,
1088
                                  int Al, UJCOEF *values, size_t *zerobits)
1089
215M
{
1090
215M
  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1091
215M
                                         Sl, Al, values, zerobits);
1092
215M
}
1093
1094
GLOBAL(int)
1095
jsimd_can_encode_mcu_AC_refine_prepare(void)
1096
35.8k
{
1097
35.8k
  init_simd();
1098
1099
35.8k
  if (DCTSIZE != 8)
1100
0
    return 0;
1101
35.8k
  if (sizeof(JCOEF) != 2)
1102
0
    return 0;
1103
35.8k
  if (SIZEOF_SIZE_T != 8)
1104
0
    return 0;
1105
35.8k
  if (simd_support & JSIMD_SSE2)
1106
35.8k
    return 1;
1107
1108
0
  return 0;
1109
35.8k
}
1110
1111
GLOBAL(int)
1112
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1113
                                   const int *jpeg_natural_order_start, int Sl,
1114
                                   int Al, UJCOEF *absvalues, size_t *bits)
1115
210M
{
1116
210M
  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1117
210M
                                                 jpeg_natural_order_start,
1118
210M
                                                 Sl, Al, absvalues, bits);
1119
210M
}