Coverage Report

Created: 2022-11-14 06:33

/src/libjpeg-turbo/simd/x86_64/jsimd.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * jsimd_x86_64.c
3
 *
4
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5
 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
6
 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
7
 *
8
 * Based on the x86 SIMD extension for IJG JPEG library,
9
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10
 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11
 *
12
 * This file contains the interface between the "normal" portions
13
 * of the library and the SIMD implementations when running on a
14
 * 64-bit x86 architecture.
15
 */
16
17
#define JPEG_INTERNALS
18
#include "../../jinclude.h"
19
#include "../../jpeglib.h"
20
#include "../../jsimd.h"
21
#include "../../jdct.h"
22
#include "../../jsimddct.h"
23
#include "../jsimd.h"
24
25
/*
26
 * In the PIC cases, we have no guarantee that constants will keep
27
 * their alignment. This macro allows us to verify it at runtime.
28
 */
29
69.1k
#define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
30
31
0
#define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
69.1k
#define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33
34
static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35
static THREAD_LOCAL unsigned int simd_huffman = 1;
36
37
/*
38
 * Check what SIMD accelerations are supported.
39
 */
40
LOCAL(void)
41
init_simd(void)
42
72.2k
{
43
72.2k
#ifndef NO_GETENV
44
72.2k
  char env[2] = { 0 };
45
72.2k
#endif
46
47
72.2k
  if (simd_support != ~0U)
48
72.2k
    return;
49
50
1
  simd_support = jpeg_simd_cpu_support();
51
52
1
#ifndef NO_GETENV
53
  /* Force different settings through environment variables */
54
1
  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
55
0
    simd_support &= JSIMD_SSE2;
56
1
  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
57
0
    simd_support &= JSIMD_AVX2;
58
1
  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
59
0
    simd_support = 0;
60
1
  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
61
0
    simd_huffman = 0;
62
1
#endif
63
1
}
64
65
GLOBAL(int)
66
jsimd_can_rgb_ycc(void)
67
0
{
68
0
  init_simd();
69
70
  /* The code is optimised for these values only */
71
0
  if (BITS_IN_JSAMPLE != 8)
72
0
    return 0;
73
0
  if (sizeof(JDIMENSION) != 4)
74
0
    return 0;
75
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
76
0
    return 0;
77
78
0
  if ((simd_support & JSIMD_AVX2) &&
79
0
      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
80
0
    return 1;
81
0
  if ((simd_support & JSIMD_SSE2) &&
82
0
      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
83
0
    return 1;
84
85
0
  return 0;
86
0
}
87
88
GLOBAL(int)
89
jsimd_can_rgb_gray(void)
90
0
{
91
0
  init_simd();
92
93
  /* The code is optimised for these values only */
94
0
  if (BITS_IN_JSAMPLE != 8)
95
0
    return 0;
96
0
  if (sizeof(JDIMENSION) != 4)
97
0
    return 0;
98
0
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
99
0
    return 0;
100
101
0
  if ((simd_support & JSIMD_AVX2) &&
102
0
      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
103
0
    return 1;
104
0
  if ((simd_support & JSIMD_SSE2) &&
105
0
      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
106
0
    return 1;
107
108
0
  return 0;
109
0
}
110
111
GLOBAL(int)
112
jsimd_can_ycc_rgb(void)
113
5.13k
{
114
5.13k
  init_simd();
115
116
  /* The code is optimised for these values only */
117
5.13k
  if (BITS_IN_JSAMPLE != 8)
118
0
    return 0;
119
5.13k
  if (sizeof(JDIMENSION) != 4)
120
0
    return 0;
121
5.13k
  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122
0
    return 0;
123
124
5.13k
  if ((simd_support & JSIMD_AVX2) &&
125
5.13k
      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
126
5.13k
    return 1;
127
0
  if ((simd_support & JSIMD_SSE2) &&
128
0
      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
129
0
    return 1;
130
131
0
  return 0;
132
0
}
133
134
GLOBAL(int)
135
jsimd_can_ycc_rgb565(void)
136
0
{
137
0
  return 0;
138
0
}
139
140
GLOBAL(void)
141
jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
142
                      JSAMPIMAGE output_buf, JDIMENSION output_row,
143
                      int num_rows)
144
0
{
145
0
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146
0
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
147
148
0
  switch (cinfo->in_color_space) {
149
0
  case JCS_EXT_RGB:
150
0
    avx2fct = jsimd_extrgb_ycc_convert_avx2;
151
0
    sse2fct = jsimd_extrgb_ycc_convert_sse2;
152
0
    break;
153
0
  case JCS_EXT_RGBX:
154
0
  case JCS_EXT_RGBA:
155
0
    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
156
0
    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
157
0
    break;
158
0
  case JCS_EXT_BGR:
159
0
    avx2fct = jsimd_extbgr_ycc_convert_avx2;
160
0
    sse2fct = jsimd_extbgr_ycc_convert_sse2;
161
0
    break;
162
0
  case JCS_EXT_BGRX:
163
0
  case JCS_EXT_BGRA:
164
0
    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
165
0
    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
166
0
    break;
167
0
  case JCS_EXT_XBGR:
168
0
  case JCS_EXT_ABGR:
169
0
    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
170
0
    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
171
0
    break;
172
0
  case JCS_EXT_XRGB:
173
0
  case JCS_EXT_ARGB:
174
0
    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
175
0
    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
176
0
    break;
177
0
  default:
178
0
    avx2fct = jsimd_rgb_ycc_convert_avx2;
179
0
    sse2fct = jsimd_rgb_ycc_convert_sse2;
180
0
    break;
181
0
  }
182
183
0
  if (simd_support & JSIMD_AVX2)
184
0
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
185
0
  else
186
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
187
0
}
188
189
GLOBAL(void)
190
jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
191
                       JSAMPIMAGE output_buf, JDIMENSION output_row,
192
                       int num_rows)
193
0
{
194
0
  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
195
0
  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196
197
0
  switch (cinfo->in_color_space) {
198
0
  case JCS_EXT_RGB:
199
0
    avx2fct = jsimd_extrgb_gray_convert_avx2;
200
0
    sse2fct = jsimd_extrgb_gray_convert_sse2;
201
0
    break;
202
0
  case JCS_EXT_RGBX:
203
0
  case JCS_EXT_RGBA:
204
0
    avx2fct = jsimd_extrgbx_gray_convert_avx2;
205
0
    sse2fct = jsimd_extrgbx_gray_convert_sse2;
206
0
    break;
207
0
  case JCS_EXT_BGR:
208
0
    avx2fct = jsimd_extbgr_gray_convert_avx2;
209
0
    sse2fct = jsimd_extbgr_gray_convert_sse2;
210
0
    break;
211
0
  case JCS_EXT_BGRX:
212
0
  case JCS_EXT_BGRA:
213
0
    avx2fct = jsimd_extbgrx_gray_convert_avx2;
214
0
    sse2fct = jsimd_extbgrx_gray_convert_sse2;
215
0
    break;
216
0
  case JCS_EXT_XBGR:
217
0
  case JCS_EXT_ABGR:
218
0
    avx2fct = jsimd_extxbgr_gray_convert_avx2;
219
0
    sse2fct = jsimd_extxbgr_gray_convert_sse2;
220
0
    break;
221
0
  case JCS_EXT_XRGB:
222
0
  case JCS_EXT_ARGB:
223
0
    avx2fct = jsimd_extxrgb_gray_convert_avx2;
224
0
    sse2fct = jsimd_extxrgb_gray_convert_sse2;
225
0
    break;
226
0
  default:
227
0
    avx2fct = jsimd_rgb_gray_convert_avx2;
228
0
    sse2fct = jsimd_rgb_gray_convert_sse2;
229
0
    break;
230
0
  }
231
232
0
  if (simd_support & JSIMD_AVX2)
233
0
    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
234
0
  else
235
0
    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
236
0
}
237
238
GLOBAL(void)
239
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
240
                      JDIMENSION input_row, JSAMPARRAY output_buf,
241
                      int num_rows)
242
10.2M
{
243
10.2M
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
244
10.2M
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
245
246
10.2M
  switch (cinfo->out_color_space) {
247
0
  case JCS_EXT_RGB:
248
0
    avx2fct = jsimd_ycc_extrgb_convert_avx2;
249
0
    sse2fct = jsimd_ycc_extrgb_convert_sse2;
250
0
    break;
251
0
  case JCS_EXT_RGBX:
252
0
  case JCS_EXT_RGBA:
253
0
    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
254
0
    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
255
0
    break;
256
0
  case JCS_EXT_BGR:
257
0
    avx2fct = jsimd_ycc_extbgr_convert_avx2;
258
0
    sse2fct = jsimd_ycc_extbgr_convert_sse2;
259
0
    break;
260
0
  case JCS_EXT_BGRX:
261
0
  case JCS_EXT_BGRA:
262
0
    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
263
0
    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
264
0
    break;
265
0
  case JCS_EXT_XBGR:
266
0
  case JCS_EXT_ABGR:
267
0
    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
268
0
    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
269
0
    break;
270
0
  case JCS_EXT_XRGB:
271
0
  case JCS_EXT_ARGB:
272
0
    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
273
0
    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
274
0
    break;
275
10.2M
  default:
276
10.2M
    avx2fct = jsimd_ycc_rgb_convert_avx2;
277
10.2M
    sse2fct = jsimd_ycc_rgb_convert_sse2;
278
10.2M
    break;
279
10.2M
  }
280
281
10.2M
  if (simd_support & JSIMD_AVX2)
282
10.2M
    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
283
0
  else
284
0
    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
285
10.2M
}
286
287
GLOBAL(void)
288
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
289
                         JDIMENSION input_row, JSAMPARRAY output_buf,
290
                         int num_rows)
291
0
{
292
0
}
293
294
GLOBAL(int)
295
jsimd_can_h2v2_downsample(void)
296
0
{
297
0
  init_simd();
298
299
  /* The code is optimised for these values only */
300
0
  if (BITS_IN_JSAMPLE != 8)
301
0
    return 0;
302
0
  if (sizeof(JDIMENSION) != 4)
303
0
    return 0;
304
305
0
  if (simd_support & JSIMD_AVX2)
306
0
    return 1;
307
0
  if (simd_support & JSIMD_SSE2)
308
0
    return 1;
309
310
0
  return 0;
311
0
}
312
313
GLOBAL(int)
314
jsimd_can_h2v1_downsample(void)
315
0
{
316
0
  init_simd();
317
318
  /* The code is optimised for these values only */
319
0
  if (BITS_IN_JSAMPLE != 8)
320
0
    return 0;
321
0
  if (sizeof(JDIMENSION) != 4)
322
0
    return 0;
323
324
0
  if (simd_support & JSIMD_AVX2)
325
0
    return 1;
326
0
  if (simd_support & JSIMD_SSE2)
327
0
    return 1;
328
329
0
  return 0;
330
0
}
331
332
GLOBAL(void)
333
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
334
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
335
0
{
336
0
  if (simd_support & JSIMD_AVX2)
337
0
    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
338
0
                               compptr->v_samp_factor,
339
0
                               compptr->width_in_blocks, input_data,
340
0
                               output_data);
341
0
  else
342
0
    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
343
0
                               compptr->v_samp_factor,
344
0
                               compptr->width_in_blocks, input_data,
345
0
                               output_data);
346
0
}
347
348
GLOBAL(void)
349
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
350
                      JSAMPARRAY input_data, JSAMPARRAY output_data)
351
0
{
352
0
  if (simd_support & JSIMD_AVX2)
353
0
    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
354
0
                               compptr->v_samp_factor,
355
0
                               compptr->width_in_blocks, input_data,
356
0
                               output_data);
357
0
  else
358
0
    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
359
0
                               compptr->v_samp_factor,
360
0
                               compptr->width_in_blocks, input_data,
361
0
                               output_data);
362
0
}
363
364
GLOBAL(int)
365
jsimd_can_h2v2_upsample(void)
366
1.39k
{
367
1.39k
  init_simd();
368
369
  /* The code is optimised for these values only */
370
1.39k
  if (BITS_IN_JSAMPLE != 8)
371
0
    return 0;
372
1.39k
  if (sizeof(JDIMENSION) != 4)
373
0
    return 0;
374
375
1.39k
  if (simd_support & JSIMD_AVX2)
376
1.39k
    return 1;
377
0
  if (simd_support & JSIMD_SSE2)
378
0
    return 1;
379
380
0
  return 0;
381
0
}
382
383
GLOBAL(int)
384
jsimd_can_h2v1_upsample(void)
385
1.75k
{
386
1.75k
  init_simd();
387
388
  /* The code is optimised for these values only */
389
1.75k
  if (BITS_IN_JSAMPLE != 8)
390
0
    return 0;
391
1.75k
  if (sizeof(JDIMENSION) != 4)
392
0
    return 0;
393
394
1.75k
  if (simd_support & JSIMD_AVX2)
395
1.75k
    return 1;
396
0
  if (simd_support & JSIMD_SSE2)
397
0
    return 1;
398
399
0
  return 0;
400
0
}
401
402
GLOBAL(void)
403
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
404
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
405
1.20M
{
406
1.20M
  if (simd_support & JSIMD_AVX2)
407
1.20M
    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
408
1.20M
                             input_data, output_data_ptr);
409
0
  else
410
0
    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
411
0
                             input_data, output_data_ptr);
412
1.20M
}
413
414
GLOBAL(void)
415
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
416
                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
417
5.01M
{
418
5.01M
  if (simd_support & JSIMD_AVX2)
419
5.01M
    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
420
5.01M
                             input_data, output_data_ptr);
421
0
  else
422
0
    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
423
0
                             input_data, output_data_ptr);
424
5.01M
}
425
426
GLOBAL(int)
427
jsimd_can_h2v2_fancy_upsample(void)
428
3.69k
{
429
3.69k
  init_simd();
430
431
  /* The code is optimised for these values only */
432
3.69k
  if (BITS_IN_JSAMPLE != 8)
433
0
    return 0;
434
3.69k
  if (sizeof(JDIMENSION) != 4)
435
0
    return 0;
436
437
3.69k
  if ((simd_support & JSIMD_AVX2) &&
438
3.69k
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
439
3.69k
    return 1;
440
0
  if ((simd_support & JSIMD_SSE2) &&
441
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
442
0
    return 1;
443
444
0
  return 0;
445
0
}
446
447
GLOBAL(int)
448
jsimd_can_h2v1_fancy_upsample(void)
449
547
{
450
547
  init_simd();
451
452
  /* The code is optimised for these values only */
453
547
  if (BITS_IN_JSAMPLE != 8)
454
0
    return 0;
455
547
  if (sizeof(JDIMENSION) != 4)
456
0
    return 0;
457
458
547
  if ((simd_support & JSIMD_AVX2) &&
459
547
      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
460
547
    return 1;
461
0
  if ((simd_support & JSIMD_SSE2) &&
462
0
      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463
0
    return 1;
464
465
0
  return 0;
466
0
}
467
468
GLOBAL(void)
469
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
470
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
471
5.37M
{
472
5.37M
  if (simd_support & JSIMD_AVX2)
473
5.37M
    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
474
5.37M
                                   compptr->downsampled_width, input_data,
475
5.37M
                                   output_data_ptr);
476
0
  else
477
0
    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
478
0
                                   compptr->downsampled_width, input_data,
479
0
                                   output_data_ptr);
480
5.37M
}
481
482
GLOBAL(void)
483
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
484
                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
485
168k
{
486
168k
  if (simd_support & JSIMD_AVX2)
487
168k
    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
488
168k
                                   compptr->downsampled_width, input_data,
489
168k
                                   output_data_ptr);
490
0
  else
491
0
    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
492
0
                                   compptr->downsampled_width, input_data,
493
0
                                   output_data_ptr);
494
168k
}
495
496
GLOBAL(int)
497
jsimd_can_h2v2_merged_upsample(void)
498
0
{
499
0
  init_simd();
500
501
  /* The code is optimised for these values only */
502
0
  if (BITS_IN_JSAMPLE != 8)
503
0
    return 0;
504
0
  if (sizeof(JDIMENSION) != 4)
505
0
    return 0;
506
507
0
  if ((simd_support & JSIMD_AVX2) &&
508
0
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
509
0
    return 1;
510
0
  if ((simd_support & JSIMD_SSE2) &&
511
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
512
0
    return 1;
513
514
0
  return 0;
515
0
}
516
517
GLOBAL(int)
518
jsimd_can_h2v1_merged_upsample(void)
519
0
{
520
0
  init_simd();
521
522
  /* The code is optimised for these values only */
523
0
  if (BITS_IN_JSAMPLE != 8)
524
0
    return 0;
525
0
  if (sizeof(JDIMENSION) != 4)
526
0
    return 0;
527
528
0
  if ((simd_support & JSIMD_AVX2) &&
529
0
      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
530
0
    return 1;
531
0
  if ((simd_support & JSIMD_SSE2) &&
532
0
      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
533
0
    return 1;
534
535
0
  return 0;
536
0
}
537
538
GLOBAL(void)
539
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
540
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
541
0
{
542
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
543
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
544
545
0
  switch (cinfo->out_color_space) {
546
0
  case JCS_EXT_RGB:
547
0
    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
548
0
    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
549
0
    break;
550
0
  case JCS_EXT_RGBX:
551
0
  case JCS_EXT_RGBA:
552
0
    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
553
0
    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
554
0
    break;
555
0
  case JCS_EXT_BGR:
556
0
    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
557
0
    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
558
0
    break;
559
0
  case JCS_EXT_BGRX:
560
0
  case JCS_EXT_BGRA:
561
0
    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
562
0
    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
563
0
    break;
564
0
  case JCS_EXT_XBGR:
565
0
  case JCS_EXT_ABGR:
566
0
    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
567
0
    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
568
0
    break;
569
0
  case JCS_EXT_XRGB:
570
0
  case JCS_EXT_ARGB:
571
0
    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
572
0
    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
573
0
    break;
574
0
  default:
575
0
    avx2fct = jsimd_h2v2_merged_upsample_avx2;
576
0
    sse2fct = jsimd_h2v2_merged_upsample_sse2;
577
0
    break;
578
0
  }
579
580
0
  if (simd_support & JSIMD_AVX2)
581
0
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
582
0
  else
583
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
584
0
}
585
586
GLOBAL(void)
587
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
588
                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
589
0
{
590
0
  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
591
0
  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
592
593
0
  switch (cinfo->out_color_space) {
594
0
  case JCS_EXT_RGB:
595
0
    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
596
0
    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
597
0
    break;
598
0
  case JCS_EXT_RGBX:
599
0
  case JCS_EXT_RGBA:
600
0
    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
601
0
    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
602
0
    break;
603
0
  case JCS_EXT_BGR:
604
0
    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
605
0
    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
606
0
    break;
607
0
  case JCS_EXT_BGRX:
608
0
  case JCS_EXT_BGRA:
609
0
    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
610
0
    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
611
0
    break;
612
0
  case JCS_EXT_XBGR:
613
0
  case JCS_EXT_ABGR:
614
0
    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
615
0
    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
616
0
    break;
617
0
  case JCS_EXT_XRGB:
618
0
  case JCS_EXT_ARGB:
619
0
    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
620
0
    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
621
0
    break;
622
0
  default:
623
0
    avx2fct = jsimd_h2v1_merged_upsample_avx2;
624
0
    sse2fct = jsimd_h2v1_merged_upsample_sse2;
625
0
    break;
626
0
  }
627
628
0
  if (simd_support & JSIMD_AVX2)
629
0
    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
630
0
  else
631
0
    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
632
0
}
633
634
GLOBAL(int)
635
jsimd_can_convsamp(void)
636
0
{
637
0
  init_simd();
638
639
  /* The code is optimised for these values only */
640
0
  if (DCTSIZE != 8)
641
0
    return 0;
642
0
  if (BITS_IN_JSAMPLE != 8)
643
0
    return 0;
644
0
  if (sizeof(JDIMENSION) != 4)
645
0
    return 0;
646
0
  if (sizeof(DCTELEM) != 2)
647
0
    return 0;
648
649
0
  if (simd_support & JSIMD_AVX2)
650
0
    return 1;
651
0
  if (simd_support & JSIMD_SSE2)
652
0
    return 1;
653
654
0
  return 0;
655
0
}
656
657
GLOBAL(int)
658
jsimd_can_convsamp_float(void)
659
0
{
660
0
  init_simd();
661
662
  /* The code is optimised for these values only */
663
0
  if (DCTSIZE != 8)
664
0
    return 0;
665
0
  if (BITS_IN_JSAMPLE != 8)
666
0
    return 0;
667
0
  if (sizeof(JDIMENSION) != 4)
668
0
    return 0;
669
0
  if (sizeof(FAST_FLOAT) != 4)
670
0
    return 0;
671
672
0
  if (simd_support & JSIMD_SSE2)
673
0
    return 1;
674
675
0
  return 0;
676
0
}
677
678
GLOBAL(void)
679
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
680
               DCTELEM *workspace)
681
0
{
682
0
  if (simd_support & JSIMD_AVX2)
683
0
    jsimd_convsamp_avx2(sample_data, start_col, workspace);
684
0
  else
685
0
    jsimd_convsamp_sse2(sample_data, start_col, workspace);
686
0
}
687
688
GLOBAL(void)
689
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
690
                     FAST_FLOAT *workspace)
691
0
{
692
0
  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
693
0
}
694
695
GLOBAL(int)
696
jsimd_can_fdct_islow(void)
697
0
{
698
0
  init_simd();
699
700
  /* The code is optimised for these values only */
701
0
  if (DCTSIZE != 8)
702
0
    return 0;
703
0
  if (sizeof(DCTELEM) != 2)
704
0
    return 0;
705
706
0
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
707
0
    return 1;
708
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
709
0
    return 1;
710
711
0
  return 0;
712
0
}
713
714
GLOBAL(int)
715
jsimd_can_fdct_ifast(void)
716
0
{
717
0
  init_simd();
718
719
  /* The code is optimised for these values only */
720
0
  if (DCTSIZE != 8)
721
0
    return 0;
722
0
  if (sizeof(DCTELEM) != 2)
723
0
    return 0;
724
725
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
726
0
    return 1;
727
728
0
  return 0;
729
0
}
730
731
GLOBAL(int)
732
jsimd_can_fdct_float(void)
733
0
{
734
0
  init_simd();
735
736
  /* The code is optimised for these values only */
737
0
  if (DCTSIZE != 8)
738
0
    return 0;
739
0
  if (sizeof(FAST_FLOAT) != 4)
740
0
    return 0;
741
742
0
  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
743
0
    return 1;
744
745
0
  return 0;
746
0
}
747
748
GLOBAL(void)
749
jsimd_fdct_islow(DCTELEM *data)
750
0
{
751
0
  if (simd_support & JSIMD_AVX2)
752
0
    jsimd_fdct_islow_avx2(data);
753
0
  else
754
0
    jsimd_fdct_islow_sse2(data);
755
0
}
756
757
GLOBAL(void)
758
jsimd_fdct_ifast(DCTELEM *data)
759
0
{
760
0
  jsimd_fdct_ifast_sse2(data);
761
0
}
762
763
GLOBAL(void)
764
jsimd_fdct_float(FAST_FLOAT *data)
765
0
{
766
0
  jsimd_fdct_float_sse(data);
767
0
}
768
769
GLOBAL(int)
770
jsimd_can_quantize(void)
771
0
{
772
0
  init_simd();
773
774
  /* The code is optimised for these values only */
775
0
  if (DCTSIZE != 8)
776
0
    return 0;
777
0
  if (sizeof(JCOEF) != 2)
778
0
    return 0;
779
0
  if (sizeof(DCTELEM) != 2)
780
0
    return 0;
781
782
0
  if (simd_support & JSIMD_AVX2)
783
0
    return 1;
784
0
  if (simd_support & JSIMD_SSE2)
785
0
    return 1;
786
787
0
  return 0;
788
0
}
789
790
GLOBAL(int)
791
jsimd_can_quantize_float(void)
792
0
{
793
0
  init_simd();
794
795
  /* The code is optimised for these values only */
796
0
  if (DCTSIZE != 8)
797
0
    return 0;
798
0
  if (sizeof(JCOEF) != 2)
799
0
    return 0;
800
0
  if (sizeof(FAST_FLOAT) != 4)
801
0
    return 0;
802
803
0
  if (simd_support & JSIMD_SSE2)
804
0
    return 1;
805
806
0
  return 0;
807
0
}
808
809
GLOBAL(void)
810
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
811
0
{
812
0
  if (simd_support & JSIMD_AVX2)
813
0
    jsimd_quantize_avx2(coef_block, divisors, workspace);
814
0
  else
815
0
    jsimd_quantize_sse2(coef_block, divisors, workspace);
816
0
}
817
818
GLOBAL(void)
819
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
820
                     FAST_FLOAT *workspace)
821
0
{
822
0
  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
823
0
}
824
825
GLOBAL(int)
826
jsimd_can_idct_2x2(void)
827
0
{
828
0
  init_simd();
829
830
  /* The code is optimised for these values only */
831
0
  if (DCTSIZE != 8)
832
0
    return 0;
833
0
  if (sizeof(JCOEF) != 2)
834
0
    return 0;
835
0
  if (BITS_IN_JSAMPLE != 8)
836
0
    return 0;
837
0
  if (sizeof(JDIMENSION) != 4)
838
0
    return 0;
839
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
840
0
    return 0;
841
842
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
843
0
    return 1;
844
845
0
  return 0;
846
0
}
847
848
GLOBAL(int)
849
jsimd_can_idct_4x4(void)
850
0
{
851
0
  init_simd();
852
853
  /* The code is optimised for these values only */
854
0
  if (DCTSIZE != 8)
855
0
    return 0;
856
0
  if (sizeof(JCOEF) != 2)
857
0
    return 0;
858
0
  if (BITS_IN_JSAMPLE != 8)
859
0
    return 0;
860
0
  if (sizeof(JDIMENSION) != 4)
861
0
    return 0;
862
0
  if (sizeof(ISLOW_MULT_TYPE) != 2)
863
0
    return 0;
864
865
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
866
0
    return 1;
867
868
0
  return 0;
869
0
}
870
871
GLOBAL(void)
872
jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
873
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
874
               JDIMENSION output_col)
875
0
{
876
0
  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
877
0
}
878
879
GLOBAL(void)
880
jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
881
               JCOEFPTR coef_block, JSAMPARRAY output_buf,
882
               JDIMENSION output_col)
883
0
{
884
0
  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
885
0
}
886
887
GLOBAL(int)
888
jsimd_can_idct_islow(void)
889
59.7k
{
890
59.7k
  init_simd();
891
892
  /* The code is optimised for these values only */
893
59.7k
  if (DCTSIZE != 8)
894
0
    return 0;
895
59.7k
  if (sizeof(JCOEF) != 2)
896
0
    return 0;
897
59.7k
  if (BITS_IN_JSAMPLE != 8)
898
0
    return 0;
899
59.7k
  if (sizeof(JDIMENSION) != 4)
900
0
    return 0;
901
59.7k
  if (sizeof(ISLOW_MULT_TYPE) != 2)
902
0
    return 0;
903
904
59.7k
  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
905
59.7k
    return 1;
906
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
907
0
    return 1;
908
909
0
  return 0;
910
0
}
911
912
GLOBAL(int)
913
jsimd_can_idct_ifast(void)
914
0
{
915
0
  init_simd();
916
917
  /* The code is optimised for these values only */
918
0
  if (DCTSIZE != 8)
919
0
    return 0;
920
0
  if (sizeof(JCOEF) != 2)
921
0
    return 0;
922
0
  if (BITS_IN_JSAMPLE != 8)
923
0
    return 0;
924
0
  if (sizeof(JDIMENSION) != 4)
925
0
    return 0;
926
0
  if (sizeof(IFAST_MULT_TYPE) != 2)
927
0
    return 0;
928
0
  if (IFAST_SCALE_BITS != 2)
929
0
    return 0;
930
931
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
932
0
    return 1;
933
934
0
  return 0;
935
0
}
936
937
GLOBAL(int)
938
jsimd_can_idct_float(void)
939
0
{
940
0
  init_simd();
941
942
0
  if (DCTSIZE != 8)
943
0
    return 0;
944
0
  if (sizeof(JCOEF) != 2)
945
0
    return 0;
946
0
  if (BITS_IN_JSAMPLE != 8)
947
0
    return 0;
948
0
  if (sizeof(JDIMENSION) != 4)
949
0
    return 0;
950
0
  if (sizeof(FAST_FLOAT) != 4)
951
0
    return 0;
952
0
  if (sizeof(FLOAT_MULT_TYPE) != 4)
953
0
    return 0;
954
955
0
  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
956
0
    return 1;
957
958
0
  return 0;
959
0
}
960
961
GLOBAL(void)
962
jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
963
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
964
                 JDIMENSION output_col)
965
187M
{
966
187M
  if (simd_support & JSIMD_AVX2)
967
187M
    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
968
187M
                          output_col);
969
0
  else
970
0
    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
971
0
                          output_col);
972
187M
}
973
974
GLOBAL(void)
975
jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
976
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
977
                 JDIMENSION output_col)
978
0
{
979
0
  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
980
0
                        output_col);
981
0
}
982
983
GLOBAL(void)
984
jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
985
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
986
                 JDIMENSION output_col)
987
0
{
988
0
  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
989
0
                        output_col);
990
0
}
991
992
GLOBAL(int)
993
jsimd_can_huff_encode_one_block(void)
994
0
{
995
0
  init_simd();
996
997
0
  if (DCTSIZE != 8)
998
0
    return 0;
999
0
  if (sizeof(JCOEF) != 2)
1000
0
    return 0;
1001
1002
0
  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1003
0
      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1004
0
    return 1;
1005
1006
0
  return 0;
1007
0
}
1008
1009
GLOBAL(JOCTET *)
1010
jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1011
                            int last_dc_val, c_derived_tbl *dctbl,
1012
                            c_derived_tbl *actbl)
1013
0
{
1014
0
  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1015
0
                                          dctbl, actbl);
1016
0
}
1017
1018
GLOBAL(int)
1019
jsimd_can_encode_mcu_AC_first_prepare(void)
1020
0
{
1021
0
  init_simd();
1022
1023
0
  if (DCTSIZE != 8)
1024
0
    return 0;
1025
0
  if (sizeof(JCOEF) != 2)
1026
0
    return 0;
1027
0
  if (simd_support & JSIMD_SSE2)
1028
0
    return 1;
1029
1030
0
  return 0;
1031
0
}
1032
1033
GLOBAL(void)
1034
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1035
                                  const int *jpeg_natural_order_start, int Sl,
1036
                                  int Al, JCOEF *values, size_t *zerobits)
1037
0
{
1038
0
  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1039
0
                                         Sl, Al, values, zerobits);
1040
0
}
1041
1042
GLOBAL(int)
1043
jsimd_can_encode_mcu_AC_refine_prepare(void)
1044
0
{
1045
0
  init_simd();
1046
1047
0
  if (DCTSIZE != 8)
1048
0
    return 0;
1049
0
  if (sizeof(JCOEF) != 2)
1050
0
    return 0;
1051
0
  if (simd_support & JSIMD_SSE2)
1052
0
    return 1;
1053
1054
0
  return 0;
1055
0
}
1056
1057
GLOBAL(int)
1058
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1059
                                   const int *jpeg_natural_order_start, int Sl,
1060
                                   int Al, JCOEF *absvalues, size_t *bits)
1061
0
{
1062
0
  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1063
0
                                                 jpeg_natural_order_start,
1064
0
                                                 Sl, Al, absvalues, bits);
1065
0
}