Coverage Report

Created: 2023-06-07 06:03

/src/libjpeg-turbo.2.1.x/simd/x86_64/jsimd.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * jsimd_x86_64.c
3
 *
4
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
6
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7
 *
8
 * Based on the x86 SIMD extension for IJG JPEG library,
9
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10
 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11
 *
12
 * This file contains the interface between the "normal" portions
13
 * of the library and the SIMD implementations when running on a
14
 * 64-bit x86 architecture.
15
 */
16
17
#define JPEG_INTERNALS
18
#include "../../jinclude.h"
19
#include "../../jpeglib.h"
20
#include "../../jsimd.h"
21
#include "../../jdct.h"
22
#include "../../jsimddct.h"
23
#include "../jsimd.h"
24
25
/*
26
 * In the PIC cases, we have no guarantee that constants will keep
27
 * their alignment. This macro allows us to verify it at runtime.
28
 */
29
241k
#define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
30
31
149k
#define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
91.8k
#define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33
34
static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35
static THREAD_LOCAL unsigned int simd_huffman = 1;
36
37
/*
38
 * Check what SIMD accelerations are supported.
39
 */
40
LOCAL(void)
41
init_simd(void)
42
541k
{
43
541k
#ifndef NO_GETENV
44
541k
  char env[2] = { 0 };
45
541k
#endif
46
47
541k
  if (simd_support != ~0U)
48
541k
    return;
49
50
13
  simd_support = jpeg_simd_cpu_support();
51
52
13
#ifndef NO_GETENV
53
  /* Force different settings through environment variables */
54
13
  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
55
0
    simd_support &= JSIMD_SSE2;
56
13
  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
57
0
    simd_support &= JSIMD_AVX2;
58
13
  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
59
0
    simd_support = 0;
60
13
  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
61
0
    simd_huffman = 0;
62
13
#endif
63
13
}
64
65
GLOBAL(int)
66
jsimd_can_rgb_ycc(void)
67
42.0k
{
68
42.0k
  init_simd();
69
70
  /* The code is optimised for these values only */
71
42.0k
  if (BITS_IN_JSAMPLE != 8)
72
0
    return 0;
73
42.0k
  if (sizeof(JDIMENSION) != 4)
74
0
    return 0;
75
42.0k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
76
0
    return 0;
77
78
42.0k
  if ((simd_support & JSIMD_AVX2) &&
79
42.0k
      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
80
42.0k
    return 1;
81
0
  if ((simd_support & JSIMD_SSE2) &&
82
0
      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
83
0
    return 1;
84
85
0
  return 0;
86
0
}
87
88
GLOBAL(int)
89
jsimd_can_rgb_gray(void)
90
9.53k
{
91
9.53k
  init_simd();
92
93
  /* The code is optimised for these values only */
94
9.53k
  if (BITS_IN_JSAMPLE != 8)
95
0
    return 0;
96
9.53k
  if (sizeof(JDIMENSION) != 4)
97
0
    return 0;
98
9.53k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
99
0
    return 0;
100
101
9.53k
  if ((simd_support & JSIMD_AVX2) &&
102
9.53k
      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
103
9.53k
    return 1;
104
0
  if ((simd_support & JSIMD_SSE2) &&
105
0
      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
106
0
    return 1;
107
108
0
  return 0;
109
0
}
110
111
GLOBAL(int)
112
jsimd_can_ycc_rgb(void)
113
4.17k
{
114
4.17k
  init_simd();
115
116
  /* The code is optimised for these values only */
117
4.17k
  if (BITS_IN_JSAMPLE != 8)
118
0
    return 0;
119
4.17k
  if (sizeof(JDIMENSION) != 4)
120
0
    return 0;
121
4.17k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122
0
    return 0;
123
124
4.17k
  if ((simd_support & JSIMD_AVX2) &&
125
4.17k
      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
126
4.17k
    return 1;
127
0
  if ((simd_support & JSIMD_SSE2) &&
128
0
      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
129
0
    return 1;
130
131
0
  return 0;
132
0
}
133
134
GLOBAL(int)
135
jsimd_can_ycc_rgb565(void)
136
0
{
137
0
  return 0;
138
0
}
139
140
GLOBAL(void)
141
jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
142
                      JSAMPIMAGE output_buf, JDIMENSION output_row,
143
                      int num_rows)
144
115M
{
145
115M
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146
115M
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
147
148
115M
  if (simd_support == ~0U)
149
0
    init_simd();
150
151
115M
  switch (cinfo->in_color_space) {
152
29.5M
  case JCS_EXT_RGB:
153
29.5M
    avx2fct = jsimd_extrgb_ycc_convert_avx2;
154
29.5M
    sse2fct = jsimd_extrgb_ycc_convert_sse2;
155
29.5M
    break;
156
7.00M
  case JCS_EXT_RGBX:
157
7.00M
  case JCS_EXT_RGBA:
158
7.00M
    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159
7.00M
    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
160
7.00M
    break;
161
21.7M
  case JCS_EXT_BGR:
162
21.7M
    avx2fct = jsimd_extbgr_ycc_convert_avx2;
163
21.7M
    sse2fct = jsimd_extbgr_ycc_convert_sse2;
164
21.7M
    break;
165
0
  case JCS_EXT_BGRX:
166
14.0M
  case JCS_EXT_BGRA:
167
14.0M
    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168
14.0M
    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
169
14.0M
    break;
170
15.5M
  case JCS_EXT_XBGR:
171
15.5M
  case JCS_EXT_ABGR:
172
15.5M
    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173
15.5M
    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
174
15.5M
    break;
175
15.5M
  case JCS_EXT_XRGB:
176
15.5M
  case JCS_EXT_ARGB:
177
15.5M
    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178
15.5M
    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
179
15.5M
    break;
180
12.1M
  default:
181
12.1M
    avx2fct = jsimd_rgb_ycc_convert_avx2;
182
12.1M
    sse2fct = jsimd_rgb_ycc_convert_sse2;
183
12.1M
    break;
184
115M
  }
185
186
115M
  if (simd_support & JSIMD_AVX2)
187
115M
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188
0
  else
189
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190
115M
}
191
192
GLOBAL(void)
193
jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194
                       JSAMPIMAGE output_buf, JDIMENSION output_row,
195
                       int num_rows)
196
29.5M
{
197
29.5M
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198
29.5M
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
199
200
29.5M
  if (simd_support == ~0U)
201
0
    init_simd();
202
203
29.5M
  switch (cinfo->in_color_space) {
204
0
  case JCS_EXT_RGB:
205
0
    avx2fct = jsimd_extrgb_gray_convert_avx2;
206
0
    sse2fct = jsimd_extrgb_gray_convert_sse2;
207
0
    break;
208
0
  case JCS_EXT_RGBX:
209
0
  case JCS_EXT_RGBA:
210
0
    avx2fct = jsimd_extrgbx_gray_convert_avx2;
211
0
    sse2fct = jsimd_extrgbx_gray_convert_sse2;
212
0
    break;
213
15.5M
  case JCS_EXT_BGR:
214
15.5M
    avx2fct = jsimd_extbgr_gray_convert_avx2;
215
15.5M
    sse2fct = jsimd_extbgr_gray_convert_sse2;
216
15.5M
    break;
217
0
  case JCS_EXT_BGRX:
218
0
  case JCS_EXT_BGRA:
219
0
    avx2fct = jsimd_extbgrx_gray_convert_avx2;
220
0
    sse2fct = jsimd_extbgrx_gray_convert_sse2;
221
0
    break;
222
0
  case JCS_EXT_XBGR:
223
0
  case JCS_EXT_ABGR:
224
0
    avx2fct = jsimd_extxbgr_gray_convert_avx2;
225
0
    sse2fct = jsimd_extxbgr_gray_convert_sse2;
226
0
    break;
227
14.0M
  case JCS_EXT_XRGB:
228
14.0M
  case JCS_EXT_ARGB:
229
14.0M
    avx2fct = jsimd_extxrgb_gray_convert_avx2;
230
14.0M
    sse2fct = jsimd_extxrgb_gray_convert_sse2;
231
14.0M
    break;
232
0
  default:
233
0
    avx2fct = jsimd_rgb_gray_convert_avx2;
234
0
    sse2fct = jsimd_rgb_gray_convert_sse2;
235
0
    break;
236
29.5M
  }
237
238
29.5M
  if (simd_support & JSIMD_AVX2)
239
29.5M
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240
0
  else
241
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242
29.5M
}
243
244
GLOBAL(void)
245
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
246
                      JDIMENSION input_row, JSAMPARRAY output_buf,
247
                      int num_rows)
248
5.06M
{
249
5.06M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250
5.06M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251
252
5.06M
  if (simd_support == ~0U)
253
0
    init_simd();
254
255
5.06M
  switch (cinfo->out_color_space) {
256
3.51M
  case JCS_EXT_RGB:
257
3.51M
    avx2fct = jsimd_ycc_extrgb_convert_avx2;
258
3.51M
    sse2fct = jsimd_ycc_extrgb_convert_sse2;
259
3.51M
    break;
260
0
  case JCS_EXT_RGBX:
261
0
  case JCS_EXT_RGBA:
262
0
    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
263
0
    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
264
0
    break;
265
892k
  case JCS_EXT_BGR:
266
892k
    avx2fct = jsimd_ycc_extbgr_convert_avx2;
267
892k
    sse2fct = jsimd_ycc_extbgr_convert_sse2;
268
892k
    break;
269
370k
  case JCS_EXT_BGRX:
270
370k
  case JCS_EXT_BGRA:
271
370k
    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
272
370k
    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
273
370k
    break;
274
0
  case JCS_EXT_XBGR:
275
0
  case JCS_EXT_ABGR:
276
0
    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
277
0
    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
278
0
    break;
279
288k
  case JCS_EXT_XRGB:
280
288k
  case JCS_EXT_ARGB:
281
288k
    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
282
288k
    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
283
288k
    break;
284
0
  default:
285
0
    avx2fct = jsimd_ycc_rgb_convert_avx2;
286
0
    sse2fct = jsimd_ycc_rgb_convert_sse2;
287
0
    break;
288
5.06M
  }
289
290
5.06M
  if (simd_support & JSIMD_AVX2)
291
5.06M
    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292
0
  else
293
0
    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
294
5.06M
}
295
296
GLOBAL(void)
297
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
298
                         JDIMENSION input_row, JSAMPARRAY output_buf,
299
                         int num_rows)
300
0
{
301
0
}
302
303
GLOBAL(int)
304
jsimd_can_h2v2_downsample(void)
305
19.0k
{
306
19.0k
  init_simd();
307
308
  /* The code is optimised for these values only */
309
19.0k
  if (BITS_IN_JSAMPLE != 8)
310
0
    return 0;
311
19.0k
  if (sizeof(JDIMENSION) != 4)
312
0
    return 0;
313
314
19.0k
  if (simd_support & JSIMD_AVX2)
315
19.0k
    return 1;
316
0
  if (simd_support & JSIMD_SSE2)
317
0
    return 1;
318
319
0
  return 0;
320
0
}
321
322
GLOBAL(int)
323
jsimd_can_h2v1_downsample(void)
324
22.9k
{
325
22.9k
  init_simd();
326
327
  /* The code is optimised for these values only */
328
22.9k
  if (BITS_IN_JSAMPLE != 8)
329
0
    return 0;
330
22.9k
  if (sizeof(JDIMENSION) != 4)
331
0
    return 0;
332
333
22.9k
  if (simd_support & JSIMD_AVX2)
334
22.9k
    return 1;
335
0
  if (simd_support & JSIMD_SSE2)
336
0
    return 1;
337
338
0
  return 0;
339
0
}
340
341
GLOBAL(void)
342
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
343
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
344
29.5M
{
345
29.5M
  if (simd_support == ~0U)
346
0
    init_simd();
347
348
29.5M
  if (simd_support & JSIMD_AVX2)
349
29.5M
    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
350
29.5M
                               compptr->v_samp_factor,
351
29.5M
                               compptr->width_in_blocks, input_data,
352
29.5M
                               output_data);
353
0
  else
354
0
    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355
0
                               compptr->v_samp_factor,
356
0
                               compptr->width_in_blocks, input_data,
357
0
                               output_data);
358
29.5M
}
359
360
GLOBAL(void)
361
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
362
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
363
65.1M
{
364
65.1M
  if (simd_support == ~0U)
365
0
    init_simd();
366
367
65.1M
  if (simd_support & JSIMD_AVX2)
368
65.1M
    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
369
65.1M
                               compptr->v_samp_factor,
370
65.1M
                               compptr->width_in_blocks, input_data,
371
65.1M
                               output_data);
372
0
  else
373
0
    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
374
0
                               compptr->v_samp_factor,
375
0
                               compptr->width_in_blocks, input_data,
376
0
                               output_data);
377
65.1M
}
378
379
GLOBAL(int)
380
jsimd_can_h2v2_upsample(void)
381
1.71k
{
382
1.71k
  init_simd();
383
384
  /* The code is optimised for these values only */
385
1.71k
  if (BITS_IN_JSAMPLE != 8)
386
0
    return 0;
387
1.71k
  if (sizeof(JDIMENSION) != 4)
388
0
    return 0;
389
390
1.71k
  if (simd_support & JSIMD_AVX2)
391
1.71k
    return 1;
392
0
  if (simd_support & JSIMD_SSE2)
393
0
    return 1;
394
395
0
  return 0;
396
0
}
397
398
GLOBAL(int)
399
jsimd_can_h2v1_upsample(void)
400
2.19k
{
401
2.19k
  init_simd();
402
403
  /* The code is optimised for these values only */
404
2.19k
  if (BITS_IN_JSAMPLE != 8)
405
0
    return 0;
406
2.19k
  if (sizeof(JDIMENSION) != 4)
407
0
    return 0;
408
409
2.19k
  if (simd_support & JSIMD_AVX2)
410
2.19k
    return 1;
411
0
  if (simd_support & JSIMD_SSE2)
412
0
    return 1;
413
414
0
  return 0;
415
0
}
416
417
GLOBAL(void)
418
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
420
1.25M
{
421
1.25M
  if (simd_support == ~0U)
422
0
    init_simd();
423
424
1.25M
  if (simd_support & JSIMD_AVX2)
425
1.25M
    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
426
1.25M
                             input_data, output_data_ptr);
427
0
  else
428
0
    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429
0
                             input_data, output_data_ptr);
430
1.25M
}
431
432
GLOBAL(void)
433
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
434
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
435
3.29M
{
436
3.29M
  if (simd_support == ~0U)
437
0
    init_simd();
438
439
3.29M
  if (simd_support & JSIMD_AVX2)
440
3.29M
    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
441
3.29M
                             input_data, output_data_ptr);
442
0
  else
443
0
    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
444
0
                             input_data, output_data_ptr);
445
3.29M
}
446
447
GLOBAL(int)
448
jsimd_can_h2v2_fancy_upsample(void)
449
121
{
450
121
  init_simd();
451
452
  /* The code is optimised for these values only */
453
121
  if (BITS_IN_JSAMPLE != 8)
454
0
    return 0;
455
121
  if (sizeof(JDIMENSION) != 4)
456
0
    return 0;
457
458
121
  if ((simd_support & JSIMD_AVX2) &&
459
121
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
460
121
    return 1;
461
0
  if ((simd_support & JSIMD_SSE2) &&
462
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463
0
    return 1;
464
465
0
  return 0;
466
0
}
467
468
GLOBAL(int)
469
jsimd_can_h2v1_fancy_upsample(void)
470
356
{
471
356
  init_simd();
472
473
  /* The code is optimised for these values only */
474
356
  if (BITS_IN_JSAMPLE != 8)
475
0
    return 0;
476
356
  if (sizeof(JDIMENSION) != 4)
477
0
    return 0;
478
479
356
  if ((simd_support & JSIMD_AVX2) &&
480
356
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
481
356
    return 1;
482
0
  if ((simd_support & JSIMD_SSE2) &&
483
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
484
0
    return 1;
485
486
0
  return 0;
487
0
}
488
489
GLOBAL(void)
490
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
492
75.2k
{
493
75.2k
  if (simd_support == ~0U)
494
0
    init_simd();
495
496
75.2k
  if (simd_support & JSIMD_AVX2)
497
75.2k
    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
498
75.2k
                                   compptr->downsampled_width, input_data,
499
75.2k
                                   output_data_ptr);
500
0
  else
501
0
    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
502
0
                                   compptr->downsampled_width, input_data,
503
0
                                   output_data_ptr);
504
75.2k
}
505
506
GLOBAL(void)
507
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
508
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
509
171k
{
510
171k
  if (simd_support == ~0U)
511
0
    init_simd();
512
513
171k
  if (simd_support & JSIMD_AVX2)
514
171k
    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
515
171k
                                   compptr->downsampled_width, input_data,
516
171k
                                   output_data_ptr);
517
0
  else
518
0
    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
519
0
                                   compptr->downsampled_width, input_data,
520
0
                                   output_data_ptr);
521
171k
}
522
523
GLOBAL(int)
524
jsimd_can_h2v2_merged_upsample(void)
525
3.70k
{
526
3.70k
  init_simd();
527
528
  /* The code is optimised for these values only */
529
3.70k
  if (BITS_IN_JSAMPLE != 8)
530
0
    return 0;
531
3.70k
  if (sizeof(JDIMENSION) != 4)
532
0
    return 0;
533
534
3.70k
  if ((simd_support & JSIMD_AVX2) &&
535
3.70k
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
536
3.70k
    return 1;
537
0
  if ((simd_support & JSIMD_SSE2) &&
538
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
539
0
    return 1;
540
541
0
  return 0;
542
0
}
543
544
GLOBAL(int)
545
jsimd_can_h2v1_merged_upsample(void)
546
329
{
547
329
  init_simd();
548
549
  /* The code is optimised for these values only */
550
329
  if (BITS_IN_JSAMPLE != 8)
551
0
    return 0;
552
329
  if (sizeof(JDIMENSION) != 4)
553
0
    return 0;
554
555
329
  if ((simd_support & JSIMD_AVX2) &&
556
329
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
557
329
    return 1;
558
0
  if ((simd_support & JSIMD_SSE2) &&
559
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
560
0
    return 1;
561
562
0
  return 0;
563
0
}
564
565
GLOBAL(void)
566
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
567
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
568
1.26M
{
569
1.26M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
570
1.26M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
571
572
1.26M
  if (simd_support == ~0U)
573
0
    init_simd();
574
575
1.26M
  switch (cinfo->out_color_space) {
576
683k
  case JCS_EXT_RGB:
577
683k
    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
578
683k
    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
579
683k
    break;
580
0
  case JCS_EXT_RGBX:
581
0
  case JCS_EXT_RGBA:
582
0
    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
583
0
    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
584
0
    break;
585
423k
  case JCS_EXT_BGR:
586
423k
    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
587
423k
    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
588
423k
    break;
589
0
  case JCS_EXT_BGRX:
590
0
  case JCS_EXT_BGRA:
591
0
    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
592
0
    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
593
0
    break;
594
0
  case JCS_EXT_XBGR:
595
0
  case JCS_EXT_ABGR:
596
0
    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
597
0
    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
598
0
    break;
599
154k
  case JCS_EXT_XRGB:
600
154k
  case JCS_EXT_ARGB:
601
154k
    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
602
154k
    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
603
154k
    break;
604
0
  default:
605
0
    avx2fct = jsimd_h2v2_merged_upsample_avx2;
606
0
    sse2fct = jsimd_h2v2_merged_upsample_sse2;
607
0
    break;
608
1.26M
  }
609
610
1.26M
  if (simd_support & JSIMD_AVX2)
611
1.26M
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
612
0
  else
613
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
614
1.26M
}
615
616
GLOBAL(void)
617
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
618
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
619
957k
{
620
957k
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
621
957k
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
622
623
957k
  if (simd_support == ~0U)
624
0
    init_simd();
625
626
957k
  switch (cinfo->out_color_space) {
627
453k
  case JCS_EXT_RGB:
628
453k
    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
629
453k
    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
630
453k
    break;
631
0
  case JCS_EXT_RGBX:
632
0
  case JCS_EXT_RGBA:
633
0
    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
634
0
    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
635
0
    break;
636
394k
  case JCS_EXT_BGR:
637
394k
    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
638
394k
    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
639
394k
    break;
640
0
  case JCS_EXT_BGRX:
641
0
  case JCS_EXT_BGRA:
642
0
    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
643
0
    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
644
0
    break;
645
0
  case JCS_EXT_XBGR:
646
0
  case JCS_EXT_ABGR:
647
0
    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
648
0
    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
649
0
    break;
650
109k
  case JCS_EXT_XRGB:
651
109k
  case JCS_EXT_ARGB:
652
109k
    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
653
109k
    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
654
109k
    break;
655
0
  default:
656
0
    avx2fct = jsimd_h2v1_merged_upsample_avx2;
657
0
    sse2fct = jsimd_h2v1_merged_upsample_sse2;
658
0
    break;
659
957k
  }
660
661
957k
  if (simd_support & JSIMD_AVX2)
662
957k
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663
0
  else
664
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
665
957k
}
666
667
GLOBAL(int)
668
jsimd_can_convsamp(void)
669
59.6k
{
670
59.6k
  init_simd();
671
672
  /* The code is optimised for these values only */
673
59.6k
  if (DCTSIZE != 8)
674
0
    return 0;
675
59.6k
  if (BITS_IN_JSAMPLE != 8)
676
0
    return 0;
677
59.6k
  if (sizeof(JDIMENSION) != 4)
678
0
    return 0;
679
59.6k
  if (sizeof(DCTELEM) != 2)
680
0
    return 0;
681
682
59.6k
  if (simd_support & JSIMD_AVX2)
683
59.6k
    return 1;
684
0
  if (simd_support & JSIMD_SSE2)
685
0
    return 1;
686
687
0
  return 0;
688
0
}
689
690
GLOBAL(int)
691
jsimd_can_convsamp_float(void)
692
9.17k
{
693
9.17k
  init_simd();
694
695
  /* The code is optimised for these values only */
696
9.17k
  if (DCTSIZE != 8)
697
0
    return 0;
698
9.17k
  if (BITS_IN_JSAMPLE != 8)
699
0
    return 0;
700
9.17k
  if (sizeof(JDIMENSION) != 4)
701
0
    return 0;
702
9.17k
  if (sizeof(FAST_FLOAT) != 4)
703
0
    return 0;
704
705
9.17k
  if (simd_support & JSIMD_SSE2)
706
9.17k
    return 1;
707
708
0
  return 0;
709
9.17k
}
710
711
GLOBAL(void)
712
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
713
               DCTELEM *workspace)
714
82.4M
{
715
82.4M
  if (simd_support == ~0U)
716
0
    init_simd();
717
718
82.4M
  if (simd_support & JSIMD_AVX2)
719
82.4M
    jsimd_convsamp_avx2(sample_data, start_col, workspace);
720
0
  else
721
0
    jsimd_convsamp_sse2(sample_data, start_col, workspace);
722
82.4M
}
723
724
GLOBAL(void)
725
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
726
                     FAST_FLOAT *workspace)
727
16.7M
{
728
16.7M
  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
729
16.7M
}
730
731
GLOBAL(int)
732
jsimd_can_fdct_islow(void)
733
26.6k
{
734
26.6k
  init_simd();
735
736
  /* The code is optimised for these values only */
737
26.6k
  if (DCTSIZE != 8)
738
0
    return 0;
739
26.6k
  if (sizeof(DCTELEM) != 2)
740
0
    return 0;
741
742
26.6k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
743
26.6k
    return 1;
744
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
745
0
    return 1;
746
747
0
  return 0;
748
0
}
749
750
GLOBAL(int)
751
jsimd_can_fdct_ifast(void)
752
33.0k
{
753
33.0k
  init_simd();
754
755
  /* The code is optimised for these values only */
756
33.0k
  if (DCTSIZE != 8)
757
0
    return 0;
758
33.0k
  if (sizeof(DCTELEM) != 2)
759
0
    return 0;
760
761
33.0k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
762
33.0k
    return 1;
763
764
0
  return 0;
765
33.0k
}
766
767
GLOBAL(int)
768
jsimd_can_fdct_float(void)
769
9.17k
{
770
9.17k
  init_simd();
771
772
  /* The code is optimised for these values only */
773
9.17k
  if (DCTSIZE != 8)
774
0
    return 0;
775
9.17k
  if (sizeof(FAST_FLOAT) != 4)
776
0
    return 0;
777
778
9.17k
  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
779
9.17k
    return 1;
780
781
0
  return 0;
782
9.17k
}
783
784
GLOBAL(void)
785
jsimd_fdct_islow(DCTELEM *data)
786
39.8M
{
787
39.8M
  if (simd_support == ~0U)
788
0
    init_simd();
789
790
39.8M
  if (simd_support & JSIMD_AVX2)
791
39.8M
    jsimd_fdct_islow_avx2(data);
792
0
  else
793
0
    jsimd_fdct_islow_sse2(data);
794
39.8M
}
795
796
GLOBAL(void)
797
jsimd_fdct_ifast(DCTELEM *data)
798
42.5M
{
799
42.5M
  jsimd_fdct_ifast_sse2(data);
800
42.5M
}
801
802
GLOBAL(void)
803
jsimd_fdct_float(FAST_FLOAT *data)
804
16.7M
{
805
16.7M
  jsimd_fdct_float_sse(data);
806
16.7M
}
807
808
GLOBAL(int)
809
jsimd_can_quantize(void)
810
59.6k
{
811
59.6k
  init_simd();
812
813
  /* The code is optimised for these values only */
814
59.6k
  if (DCTSIZE != 8)
815
0
    return 0;
816
59.6k
  if (sizeof(JCOEF) != 2)
817
0
    return 0;
818
59.6k
  if (sizeof(DCTELEM) != 2)
819
0
    return 0;
820
821
59.6k
  if (simd_support & JSIMD_AVX2)
822
59.6k
    return 1;
823
0
  if (simd_support & JSIMD_SSE2)
824
0
    return 1;
825
826
0
  return 0;
827
0
}
828
829
GLOBAL(int)
830
jsimd_can_quantize_float(void)
831
9.17k
{
832
9.17k
  init_simd();
833
834
  /* The code is optimised for these values only */
835
9.17k
  if (DCTSIZE != 8)
836
0
    return 0;
837
9.17k
  if (sizeof(JCOEF) != 2)
838
0
    return 0;
839
9.17k
  if (sizeof(FAST_FLOAT) != 4)
840
0
    return 0;
841
842
9.17k
  if (simd_support & JSIMD_SSE2)
843
9.17k
    return 1;
844
845
0
  return 0;
846
9.17k
}
847
848
GLOBAL(void)
849
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
850
82.4M
{
851
82.4M
  if (simd_support == ~0U)
852
0
    init_simd();
853
854
82.4M
  if (simd_support & JSIMD_AVX2)
855
82.4M
    jsimd_quantize_avx2(coef_block, divisors, workspace);
856
0
  else
857
0
    jsimd_quantize_sse2(coef_block, divisors, workspace);
858
82.4M
}
859
860
GLOBAL(void)
861
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
862
                     FAST_FLOAT *workspace)
863
16.7M
{
864
16.7M
  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
865
16.7M
}
866
867
GLOBAL(int)
868
jsimd_can_idct_2x2(void)
869
779
{
870
779
  init_simd();
871
872
  /* The code is optimised for these values only */
873
779
  if (DCTSIZE != 8)
874
0
    return 0;
875
779
  if (sizeof(JCOEF) != 2)
876
0
    return 0;
877
779
  if (BITS_IN_JSAMPLE != 8)
878
0
    return 0;
879
779
  if (sizeof(JDIMENSION) != 4)
880
0
    return 0;
881
779
  if (sizeof(ISLOW_MULT_TYPE) != 2)
882
0
    return 0;
883
884
779
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
885
779
    return 1;
886
887
0
  return 0;
888
779
}
889
890
GLOBAL(int)
891
jsimd_can_idct_4x4(void)
892
2.65k
{
893
2.65k
  init_simd();
894
895
  /* The code is optimised for these values only */
896
2.65k
  if (DCTSIZE != 8)
897
0
    return 0;
898
2.65k
  if (sizeof(JCOEF) != 2)
899
0
    return 0;
900
2.65k
  if (BITS_IN_JSAMPLE != 8)
901
0
    return 0;
902
2.65k
  if (sizeof(JDIMENSION) != 4)
903
0
    return 0;
904
2.65k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
905
0
    return 0;
906
907
2.65k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
908
2.65k
    return 1;
909
910
0
  return 0;
911
2.65k
}
912
913
GLOBAL(void)
914
jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
915
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
916
               JDIMENSION output_col)
917
3.71M
{
918
3.71M
  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
919
3.71M
}
920
921
GLOBAL(void)
922
jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
923
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
924
               JDIMENSION output_col)
925
4.25M
{
926
4.25M
  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
927
4.25M
}
928
929
GLOBAL(int)
930
jsimd_can_idct_islow(void)
931
4.95k
{
932
4.95k
  init_simd();
933
934
  /* The code is optimised for these values only */
935
4.95k
  if (DCTSIZE != 8)
936
0
    return 0;
937
4.95k
  if (sizeof(JCOEF) != 2)
938
0
    return 0;
939
4.95k
  if (BITS_IN_JSAMPLE != 8)
940
0
    return 0;
941
4.95k
  if (sizeof(JDIMENSION) != 4)
942
0
    return 0;
943
4.95k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
944
0
    return 0;
945
946
4.95k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
947
4.95k
    return 1;
948
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
949
0
    return 1;
950
951
0
  return 0;
952
0
}
953
954
GLOBAL(int)
955
jsimd_can_idct_ifast(void)
956
24.2k
{
957
24.2k
  init_simd();
958
959
  /* The code is optimised for these values only */
960
24.2k
  if (DCTSIZE != 8)
961
0
    return 0;
962
24.2k
  if (sizeof(JCOEF) != 2)
963
0
    return 0;
964
24.2k
  if (BITS_IN_JSAMPLE != 8)
965
0
    return 0;
966
24.2k
  if (sizeof(JDIMENSION) != 4)
967
0
    return 0;
968
24.2k
  if (sizeof(IFAST_MULT_TYPE) != 2)
969
0
    return 0;
970
24.2k
  if (IFAST_SCALE_BITS != 2)
971
0
    return 0;
972
973
24.2k
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
974
24.2k
    return 1;
975
976
0
  return 0;
977
24.2k
}
978
979
GLOBAL(int)
980
jsimd_can_idct_float(void)
981
0
{
982
0
  init_simd();
983
984
0
  if (DCTSIZE != 8)
985
0
    return 0;
986
0
  if (sizeof(JCOEF) != 2)
987
0
    return 0;
988
0
  if (BITS_IN_JSAMPLE != 8)
989
0
    return 0;
990
0
  if (sizeof(JDIMENSION) != 4)
991
0
    return 0;
992
0
  if (sizeof(FAST_FLOAT) != 4)
993
0
    return 0;
994
0
  if (sizeof(FLOAT_MULT_TYPE) != 4)
995
0
    return 0;
996
997
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
998
0
    return 1;
999
1000
0
  return 0;
1001
0
}
1002
1003
GLOBAL(void)
1004
jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1005
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1006
                 JDIMENSION output_col)
1007
9.41M
{
1008
9.41M
  if (simd_support == ~0U)
1009
0
    init_simd();
1010
1011
9.41M
  if (simd_support & JSIMD_AVX2)
1012
9.41M
    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1013
9.41M
                          output_col);
1014
0
  else
1015
0
    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1016
0
                          output_col);
1017
9.41M
}
1018
1019
GLOBAL(void)
1020
jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1021
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1022
                 JDIMENSION output_col)
1023
85.1M
{
1024
85.1M
  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1025
85.1M
                        output_col);
1026
85.1M
}
1027
1028
GLOBAL(void)
1029
jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1030
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1031
                 JDIMENSION output_col)
1032
0
{
1033
0
  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1034
0
                        output_col);
1035
0
}
1036
1037
GLOBAL(int)
1038
jsimd_can_huff_encode_one_block(void)
1039
79.7k
{
1040
79.7k
  init_simd();
1041
1042
79.7k
  if (DCTSIZE != 8)
1043
0
    return 0;
1044
79.7k
  if (sizeof(JCOEF) != 2)
1045
0
    return 0;
1046
1047
79.7k
  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1048
79.7k
      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1049
79.7k
    return 1;
1050
1051
0
  return 0;
1052
79.7k
}
1053
1054
GLOBAL(JOCTET *)
1055
jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1056
                            int last_dc_val, c_derived_tbl *dctbl,
1057
                            c_derived_tbl *actbl)
1058
95.1M
{
1059
95.1M
  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1060
95.1M
                                          dctbl, actbl);
1061
95.1M
}
1062
1063
GLOBAL(int)
1064
jsimd_can_encode_mcu_AC_first_prepare(void)
1065
140k
{
1066
140k
  init_simd();
1067
1068
140k
  if (DCTSIZE != 8)
1069
0
    return 0;
1070
140k
  if (sizeof(JCOEF) != 2)
1071
0
    return 0;
1072
140k
  if (simd_support & JSIMD_SSE2)
1073
140k
    return 1;
1074
1075
0
  return 0;
1076
140k
}
1077
1078
GLOBAL(void)
1079
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1080
                                  const int *jpeg_natural_order_start, int Sl,
1081
                                  int Al, UJCOEF *values, size_t *zerobits)
1082
215M
{
1083
215M
  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1084
215M
                                         Sl, Al, values, zerobits);
1085
215M
}
1086
1087
GLOBAL(int)
1088
jsimd_can_encode_mcu_AC_refine_prepare(void)
1089
108k
{
1090
108k
  init_simd();
1091
1092
108k
  if (DCTSIZE != 8)
1093
0
    return 0;
1094
108k
  if (sizeof(JCOEF) != 2)
1095
0
    return 0;
1096
108k
  if (simd_support & JSIMD_SSE2)
1097
108k
    return 1;
1098
1099
0
  return 0;
1100
108k
}
1101
1102
GLOBAL(int)
1103
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1104
                                   const int *jpeg_natural_order_start, int Sl,
1105
                                   int Al, UJCOEF *absvalues, size_t *bits)
1106
210M
{
1107
210M
  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1108
210M
                                                 jpeg_natural_order_start,
1109
210M
                                                 Sl, Al, absvalues, bits);
1110
210M
}