/src/libjpeg-turbo.main/simd/x86_64/jsimd.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |  * jsimd_x86_64.c | 
| 3 |  |  * | 
| 4 |  |  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 
| 5 |  |  * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander. | 
| 6 |  |  * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. | 
| 7 |  |  * | 
| 8 |  |  * Based on the x86 SIMD extension for IJG JPEG library, | 
| 9 |  |  * Copyright (C) 1999-2006, MIYASAKA Masaru. | 
| 10 |  |  * For conditions of distribution and use, see copyright notice in jsimdext.inc | 
| 11 |  |  * | 
| 12 |  |  * This file contains the interface between the "normal" portions | 
| 13 |  |  * of the library and the SIMD implementations when running on a | 
| 14 |  |  * 64-bit x86 architecture. | 
| 15 |  |  */ | 
| 16 |  |  | 
| 17 |  | #define JPEG_INTERNALS | 
| 18 |  | #include "../../jinclude.h" | 
| 19 |  | #include "../../jpeglib.h" | 
| 20 |  | #include "../../jsimd.h" | 
| 21 |  | #include "../../jdct.h" | 
| 22 |  | #include "../../jsimddct.h" | 
| 23 |  | #include "../jsimd.h" | 
| 24 |  |  | 
| 25 |  | /* | 
| 26 |  |  * In the PIC cases, we have no guarantee that constants will keep | 
| 27 |  |  * their alignment. This macro allows us to verify it at runtime. | 
| 28 |  |  */ | 
| 29 | 14.5k | #define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0) | 
| 30 |  |  | 
| 31 | 14.5k | #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ | 
| 32 | 0 | #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ | 
| 33 |  |  | 
| 34 |  | static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0); | 
| 35 |  | static THREAD_LOCAL unsigned int simd_huffman = 1; | 
| 36 |  |  | 
| 37 |  | /* | 
| 38 |  |  * Check what SIMD accelerations are supported. | 
| 39 |  |  */ | 
| 40 |  | LOCAL(void) | 
| 41 |  | init_simd(void) | 
| 42 | 48.4k | { | 
| 43 | 48.4k | #ifndef NO_GETENV | 
| 44 | 48.4k |   char env[2] = { 0 }; | 
| 45 | 48.4k | #endif | 
| 46 |  |  | 
| 47 | 48.4k |   if (simd_support != ~0U) | 
| 48 | 48.4k |     return; | 
| 49 |  |  | 
| 50 | 1 |   simd_support = jpeg_simd_cpu_support(); | 
| 51 |  |  | 
| 52 | 1 | #ifndef NO_GETENV | 
| 53 |  |   /* Force different settings through environment variables */ | 
| 54 | 1 |   if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1")) | 
| 55 | 0 |     simd_support &= JSIMD_SSE2; | 
| 56 | 1 |   if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1")) | 
| 57 | 0 |     simd_support &= JSIMD_AVX2; | 
| 58 | 1 |   if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1")) | 
| 59 | 0 |     simd_support = 0; | 
| 60 | 1 |   if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1")) | 
| 61 | 0 |     simd_huffman = 0; | 
| 62 | 1 | #endif | 
| 63 | 1 | } | 
| 64 |  |  | 
| 65 |  | GLOBAL(int) | 
| 66 |  | jsimd_can_rgb_ycc(void) | 
| 67 | 0 | { | 
| 68 | 0 |   init_simd(); | 
| 69 |  |  | 
| 70 |  |   /* The code is optimised for these values only */ | 
| 71 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 72 | 0 |     return 0; | 
| 73 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 74 | 0 |     return 0; | 
| 75 | 0 |   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 
| 76 | 0 |     return 0; | 
| 77 |  |  | 
| 78 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 79 | 0 |       IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2)) | 
| 80 | 0 |     return 1; | 
| 81 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 82 | 0 |       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 
| 83 | 0 |     return 1; | 
| 84 |  |  | 
| 85 | 0 |   return 0; | 
| 86 | 0 | } | 
| 87 |  |  | 
| 88 |  | GLOBAL(int) | 
| 89 |  | jsimd_can_rgb_gray(void) | 
| 90 | 0 | { | 
| 91 | 0 |   init_simd(); | 
| 92 |  |  | 
| 93 |  |   /* The code is optimised for these values only */ | 
| 94 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 95 | 0 |     return 0; | 
| 96 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 97 | 0 |     return 0; | 
| 98 | 0 |   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 
| 99 | 0 |     return 0; | 
| 100 |  |  | 
| 101 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 102 | 0 |       IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2)) | 
| 103 | 0 |     return 1; | 
| 104 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 105 | 0 |       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) | 
| 106 | 0 |     return 1; | 
| 107 |  |  | 
| 108 | 0 |   return 0; | 
| 109 | 0 | } | 
| 110 |  |  | 
| 111 |  | GLOBAL(int) | 
| 112 |  | jsimd_can_ycc_rgb(void) | 
| 113 | 0 | { | 
| 114 | 0 |   init_simd(); | 
| 115 |  |  | 
| 116 |  |   /* The code is optimised for these values only */ | 
| 117 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 118 | 0 |     return 0; | 
| 119 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 120 | 0 |     return 0; | 
| 121 | 0 |   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 
| 122 | 0 |     return 0; | 
| 123 |  |  | 
| 124 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 125 | 0 |       IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2)) | 
| 126 | 0 |     return 1; | 
| 127 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 128 | 0 |       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 
| 129 | 0 |     return 1; | 
| 130 |  |  | 
| 131 | 0 |   return 0; | 
| 132 | 0 | } | 
| 133 |  |  | 
| 134 |  | GLOBAL(int) | 
| 135 |  | jsimd_can_ycc_rgb565(void) | 
| 136 | 0 | { | 
| 137 | 0 |   return 0; | 
| 138 | 0 | } | 
| 139 |  |  | 
| 140 |  | GLOBAL(void) | 
| 141 |  | jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, | 
| 142 |  |                       JSAMPIMAGE output_buf, JDIMENSION output_row, | 
| 143 |  |                       int num_rows) | 
| 144 | 0 | { | 
| 145 | 0 |   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 
| 146 | 0 |   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 
| 147 |  | 
 | 
| 148 | 0 |   if (simd_support == ~0U) | 
| 149 | 0 |     init_simd(); | 
| 150 |  | 
 | 
| 151 | 0 |   switch (cinfo->in_color_space) { | 
| 152 | 0 |   case JCS_EXT_RGB: | 
| 153 | 0 |     avx2fct = jsimd_extrgb_ycc_convert_avx2; | 
| 154 | 0 |     sse2fct = jsimd_extrgb_ycc_convert_sse2; | 
| 155 | 0 |     break; | 
| 156 | 0 |   case JCS_EXT_RGBX: | 
| 157 | 0 |   case JCS_EXT_RGBA: | 
| 158 | 0 |     avx2fct = jsimd_extrgbx_ycc_convert_avx2; | 
| 159 | 0 |     sse2fct = jsimd_extrgbx_ycc_convert_sse2; | 
| 160 | 0 |     break; | 
| 161 | 0 |   case JCS_EXT_BGR: | 
| 162 | 0 |     avx2fct = jsimd_extbgr_ycc_convert_avx2; | 
| 163 | 0 |     sse2fct = jsimd_extbgr_ycc_convert_sse2; | 
| 164 | 0 |     break; | 
| 165 | 0 |   case JCS_EXT_BGRX: | 
| 166 | 0 |   case JCS_EXT_BGRA: | 
| 167 | 0 |     avx2fct = jsimd_extbgrx_ycc_convert_avx2; | 
| 168 | 0 |     sse2fct = jsimd_extbgrx_ycc_convert_sse2; | 
| 169 | 0 |     break; | 
| 170 | 0 |   case JCS_EXT_XBGR: | 
| 171 | 0 |   case JCS_EXT_ABGR: | 
| 172 | 0 |     avx2fct = jsimd_extxbgr_ycc_convert_avx2; | 
| 173 | 0 |     sse2fct = jsimd_extxbgr_ycc_convert_sse2; | 
| 174 | 0 |     break; | 
| 175 | 0 |   case JCS_EXT_XRGB: | 
| 176 | 0 |   case JCS_EXT_ARGB: | 
| 177 | 0 |     avx2fct = jsimd_extxrgb_ycc_convert_avx2; | 
| 178 | 0 |     sse2fct = jsimd_extxrgb_ycc_convert_sse2; | 
| 179 | 0 |     break; | 
| 180 | 0 |   default: | 
| 181 | 0 |     avx2fct = jsimd_rgb_ycc_convert_avx2; | 
| 182 | 0 |     sse2fct = jsimd_rgb_ycc_convert_sse2; | 
| 183 | 0 |     break; | 
| 184 | 0 |   } | 
| 185 |  |  | 
| 186 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 187 | 0 |     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 
| 188 | 0 |   else | 
| 189 | 0 |     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 
| 190 | 0 | } | 
| 191 |  |  | 
| 192 |  | GLOBAL(void) | 
| 193 |  | jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, | 
| 194 |  |                        JSAMPIMAGE output_buf, JDIMENSION output_row, | 
| 195 |  |                        int num_rows) | 
| 196 | 0 | { | 
| 197 | 0 |   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 
| 198 | 0 |   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 
| 199 |  | 
 | 
| 200 | 0 |   if (simd_support == ~0U) | 
| 201 | 0 |     init_simd(); | 
| 202 |  | 
 | 
| 203 | 0 |   switch (cinfo->in_color_space) { | 
| 204 | 0 |   case JCS_EXT_RGB: | 
| 205 | 0 |     avx2fct = jsimd_extrgb_gray_convert_avx2; | 
| 206 | 0 |     sse2fct = jsimd_extrgb_gray_convert_sse2; | 
| 207 | 0 |     break; | 
| 208 | 0 |   case JCS_EXT_RGBX: | 
| 209 | 0 |   case JCS_EXT_RGBA: | 
| 210 | 0 |     avx2fct = jsimd_extrgbx_gray_convert_avx2; | 
| 211 | 0 |     sse2fct = jsimd_extrgbx_gray_convert_sse2; | 
| 212 | 0 |     break; | 
| 213 | 0 |   case JCS_EXT_BGR: | 
| 214 | 0 |     avx2fct = jsimd_extbgr_gray_convert_avx2; | 
| 215 | 0 |     sse2fct = jsimd_extbgr_gray_convert_sse2; | 
| 216 | 0 |     break; | 
| 217 | 0 |   case JCS_EXT_BGRX: | 
| 218 | 0 |   case JCS_EXT_BGRA: | 
| 219 | 0 |     avx2fct = jsimd_extbgrx_gray_convert_avx2; | 
| 220 | 0 |     sse2fct = jsimd_extbgrx_gray_convert_sse2; | 
| 221 | 0 |     break; | 
| 222 | 0 |   case JCS_EXT_XBGR: | 
| 223 | 0 |   case JCS_EXT_ABGR: | 
| 224 | 0 |     avx2fct = jsimd_extxbgr_gray_convert_avx2; | 
| 225 | 0 |     sse2fct = jsimd_extxbgr_gray_convert_sse2; | 
| 226 | 0 |     break; | 
| 227 | 0 |   case JCS_EXT_XRGB: | 
| 228 | 0 |   case JCS_EXT_ARGB: | 
| 229 | 0 |     avx2fct = jsimd_extxrgb_gray_convert_avx2; | 
| 230 | 0 |     sse2fct = jsimd_extxrgb_gray_convert_sse2; | 
| 231 | 0 |     break; | 
| 232 | 0 |   default: | 
| 233 | 0 |     avx2fct = jsimd_rgb_gray_convert_avx2; | 
| 234 | 0 |     sse2fct = jsimd_rgb_gray_convert_sse2; | 
| 235 | 0 |     break; | 
| 236 | 0 |   } | 
| 237 |  |  | 
| 238 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 239 | 0 |     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 
| 240 | 0 |   else | 
| 241 | 0 |     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 
| 242 | 0 | } | 
| 243 |  |  | 
| 244 |  | GLOBAL(void) | 
| 245 |  | jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, | 
| 246 |  |                       JDIMENSION input_row, JSAMPARRAY output_buf, | 
| 247 |  |                       int num_rows) | 
| 248 | 0 | { | 
| 249 | 0 |   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 
| 250 | 0 |   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 
| 251 |  | 
 | 
| 252 | 0 |   if (simd_support == ~0U) | 
| 253 | 0 |     init_simd(); | 
| 254 |  | 
 | 
| 255 | 0 |   switch (cinfo->out_color_space) { | 
| 256 | 0 |   case JCS_EXT_RGB: | 
| 257 | 0 |     avx2fct = jsimd_ycc_extrgb_convert_avx2; | 
| 258 | 0 |     sse2fct = jsimd_ycc_extrgb_convert_sse2; | 
| 259 | 0 |     break; | 
| 260 | 0 |   case JCS_EXT_RGBX: | 
| 261 | 0 |   case JCS_EXT_RGBA: | 
| 262 | 0 |     avx2fct = jsimd_ycc_extrgbx_convert_avx2; | 
| 263 | 0 |     sse2fct = jsimd_ycc_extrgbx_convert_sse2; | 
| 264 | 0 |     break; | 
| 265 | 0 |   case JCS_EXT_BGR: | 
| 266 | 0 |     avx2fct = jsimd_ycc_extbgr_convert_avx2; | 
| 267 | 0 |     sse2fct = jsimd_ycc_extbgr_convert_sse2; | 
| 268 | 0 |     break; | 
| 269 | 0 |   case JCS_EXT_BGRX: | 
| 270 | 0 |   case JCS_EXT_BGRA: | 
| 271 | 0 |     avx2fct = jsimd_ycc_extbgrx_convert_avx2; | 
| 272 | 0 |     sse2fct = jsimd_ycc_extbgrx_convert_sse2; | 
| 273 | 0 |     break; | 
| 274 | 0 |   case JCS_EXT_XBGR: | 
| 275 | 0 |   case JCS_EXT_ABGR: | 
| 276 | 0 |     avx2fct = jsimd_ycc_extxbgr_convert_avx2; | 
| 277 | 0 |     sse2fct = jsimd_ycc_extxbgr_convert_sse2; | 
| 278 | 0 |     break; | 
| 279 | 0 |   case JCS_EXT_XRGB: | 
| 280 | 0 |   case JCS_EXT_ARGB: | 
| 281 | 0 |     avx2fct = jsimd_ycc_extxrgb_convert_avx2; | 
| 282 | 0 |     sse2fct = jsimd_ycc_extxrgb_convert_sse2; | 
| 283 | 0 |     break; | 
| 284 | 0 |   default: | 
| 285 | 0 |     avx2fct = jsimd_ycc_rgb_convert_avx2; | 
| 286 | 0 |     sse2fct = jsimd_ycc_rgb_convert_sse2; | 
| 287 | 0 |     break; | 
| 288 | 0 |   } | 
| 289 |  |  | 
| 290 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 291 | 0 |     avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | 
| 292 | 0 |   else | 
| 293 | 0 |     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | 
| 294 | 0 | } | 
| 295 |  |  | 
| 296 |  | GLOBAL(void) | 
| 297 |  | jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, | 
| 298 |  |                          JDIMENSION input_row, JSAMPARRAY output_buf, | 
| 299 |  |                          int num_rows) | 
| 300 | 0 | { | 
| 301 | 0 | } | 
| 302 |  |  | 
| 303 |  | GLOBAL(int) | 
| 304 |  | jsimd_can_h2v2_downsample(void) | 
| 305 | 0 | { | 
| 306 | 0 |   init_simd(); | 
| 307 |  |  | 
| 308 |  |   /* The code is optimised for these values only */ | 
| 309 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 310 | 0 |     return 0; | 
| 311 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 312 | 0 |     return 0; | 
| 313 |  |  | 
| 314 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 315 | 0 |     return 1; | 
| 316 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 317 | 0 |     return 1; | 
| 318 |  |  | 
| 319 | 0 |   return 0; | 
| 320 | 0 | } | 
| 321 |  |  | 
| 322 |  | GLOBAL(int) | 
| 323 |  | jsimd_can_h2v1_downsample(void) | 
| 324 | 0 | { | 
| 325 | 0 |   init_simd(); | 
| 326 |  |  | 
| 327 |  |   /* The code is optimised for these values only */ | 
| 328 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 329 | 0 |     return 0; | 
| 330 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 331 | 0 |     return 0; | 
| 332 |  |  | 
| 333 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 334 | 0 |     return 1; | 
| 335 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 336 | 0 |     return 1; | 
| 337 |  |  | 
| 338 | 0 |   return 0; | 
| 339 | 0 | } | 
| 340 |  |  | 
| 341 |  | GLOBAL(void) | 
| 342 |  | jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, | 
| 343 |  |                       JSAMPARRAY input_data, JSAMPARRAY output_data) | 
| 344 | 0 | { | 
| 345 | 0 |   if (simd_support == ~0U) | 
| 346 | 0 |     init_simd(); | 
| 347 |  | 
 | 
| 348 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 349 | 0 |     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, | 
| 350 | 0 |                                compptr->v_samp_factor, | 
| 351 | 0 |                                compptr->width_in_blocks, input_data, | 
| 352 | 0 |                                output_data); | 
| 353 | 0 |   else | 
| 354 | 0 |     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, | 
| 355 | 0 |                                compptr->v_samp_factor, | 
| 356 | 0 |                                compptr->width_in_blocks, input_data, | 
| 357 | 0 |                                output_data); | 
| 358 | 0 | } | 
| 359 |  |  | 
| 360 |  | GLOBAL(void) | 
| 361 |  | jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, | 
| 362 |  |                       JSAMPARRAY input_data, JSAMPARRAY output_data) | 
| 363 | 0 | { | 
| 364 | 0 |   if (simd_support == ~0U) | 
| 365 | 0 |     init_simd(); | 
| 366 |  | 
 | 
| 367 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 368 | 0 |     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, | 
| 369 | 0 |                                compptr->v_samp_factor, | 
| 370 | 0 |                                compptr->width_in_blocks, input_data, | 
| 371 | 0 |                                output_data); | 
| 372 | 0 |   else | 
| 373 | 0 |     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, | 
| 374 | 0 |                                compptr->v_samp_factor, | 
| 375 | 0 |                                compptr->width_in_blocks, input_data, | 
| 376 | 0 |                                output_data); | 
| 377 | 0 | } | 
| 378 |  |  | 
| 379 |  | GLOBAL(int) | 
| 380 |  | jsimd_can_h2v2_upsample(void) | 
| 381 | 0 | { | 
| 382 | 0 |   init_simd(); | 
| 383 |  |  | 
| 384 |  |   /* The code is optimised for these values only */ | 
| 385 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 386 | 0 |     return 0; | 
| 387 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 388 | 0 |     return 0; | 
| 389 |  |  | 
| 390 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 391 | 0 |     return 1; | 
| 392 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 393 | 0 |     return 1; | 
| 394 |  |  | 
| 395 | 0 |   return 0; | 
| 396 | 0 | } | 
| 397 |  |  | 
| 398 |  | GLOBAL(int) | 
| 399 |  | jsimd_can_h2v1_upsample(void) | 
| 400 | 0 | { | 
| 401 | 0 |   init_simd(); | 
| 402 |  |  | 
| 403 |  |   /* The code is optimised for these values only */ | 
| 404 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 405 | 0 |     return 0; | 
| 406 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 407 | 0 |     return 0; | 
| 408 |  |  | 
| 409 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 410 | 0 |     return 1; | 
| 411 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 412 | 0 |     return 1; | 
| 413 |  |  | 
| 414 | 0 |   return 0; | 
| 415 | 0 | } | 
| 416 |  |  | 
| 417 |  | GLOBAL(void) | 
| 418 |  | jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 419 |  |                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) | 
| 420 | 0 | { | 
| 421 | 0 |   if (simd_support == ~0U) | 
| 422 | 0 |     init_simd(); | 
| 423 |  | 
 | 
| 424 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 425 | 0 |     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, | 
| 426 | 0 |                              input_data, output_data_ptr); | 
| 427 | 0 |   else | 
| 428 | 0 |     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, | 
| 429 | 0 |                              input_data, output_data_ptr); | 
| 430 | 0 | } | 
| 431 |  |  | 
| 432 |  | GLOBAL(void) | 
| 433 |  | jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 434 |  |                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) | 
| 435 | 0 | { | 
| 436 | 0 |   if (simd_support == ~0U) | 
| 437 | 0 |     init_simd(); | 
| 438 |  | 
 | 
| 439 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 440 | 0 |     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, | 
| 441 | 0 |                              input_data, output_data_ptr); | 
| 442 | 0 |   else | 
| 443 | 0 |     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, | 
| 444 | 0 |                              input_data, output_data_ptr); | 
| 445 | 0 | } | 
| 446 |  |  | 
| 447 |  | GLOBAL(int) | 
| 448 |  | jsimd_can_h2v2_fancy_upsample(void) | 
| 449 | 0 | { | 
| 450 | 0 |   init_simd(); | 
| 451 |  |  | 
| 452 |  |   /* The code is optimised for these values only */ | 
| 453 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 454 | 0 |     return 0; | 
| 455 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 456 | 0 |     return 0; | 
| 457 |  |  | 
| 458 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 459 | 0 |       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) | 
| 460 | 0 |     return 1; | 
| 461 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 462 | 0 |       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 
| 463 | 0 |     return 1; | 
| 464 |  |  | 
| 465 | 0 |   return 0; | 
| 466 | 0 | } | 
| 467 |  |  | 
| 468 |  | GLOBAL(int) | 
| 469 |  | jsimd_can_h2v1_fancy_upsample(void) | 
| 470 | 0 | { | 
| 471 | 0 |   init_simd(); | 
| 472 |  |  | 
| 473 |  |   /* The code is optimised for these values only */ | 
| 474 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 475 | 0 |     return 0; | 
| 476 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 477 | 0 |     return 0; | 
| 478 |  |  | 
| 479 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 480 | 0 |       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) | 
| 481 | 0 |     return 1; | 
| 482 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 483 | 0 |       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 
| 484 | 0 |     return 1; | 
| 485 |  |  | 
| 486 | 0 |   return 0; | 
| 487 | 0 | } | 
| 488 |  |  | 
| 489 |  | GLOBAL(void) | 
| 490 |  | jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 491 |  |                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) | 
| 492 | 0 | { | 
| 493 | 0 |   if (simd_support == ~0U) | 
| 494 | 0 |     init_simd(); | 
| 495 |  | 
 | 
| 496 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 497 | 0 |     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, | 
| 498 | 0 |                                    compptr->downsampled_width, input_data, | 
| 499 | 0 |                                    output_data_ptr); | 
| 500 | 0 |   else | 
| 501 | 0 |     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 
| 502 | 0 |                                    compptr->downsampled_width, input_data, | 
| 503 | 0 |                                    output_data_ptr); | 
| 504 | 0 | } | 
| 505 |  |  | 
| 506 |  | GLOBAL(void) | 
| 507 |  | jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 508 |  |                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) | 
| 509 | 0 | { | 
| 510 | 0 |   if (simd_support == ~0U) | 
| 511 | 0 |     init_simd(); | 
| 512 |  | 
 | 
| 513 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 514 | 0 |     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, | 
| 515 | 0 |                                    compptr->downsampled_width, input_data, | 
| 516 | 0 |                                    output_data_ptr); | 
| 517 | 0 |   else | 
| 518 | 0 |     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 
| 519 | 0 |                                    compptr->downsampled_width, input_data, | 
| 520 | 0 |                                    output_data_ptr); | 
| 521 | 0 | } | 
| 522 |  |  | 
| 523 |  | GLOBAL(int) | 
| 524 |  | jsimd_can_h2v2_merged_upsample(void) | 
| 525 | 0 | { | 
| 526 | 0 |   init_simd(); | 
| 527 |  |  | 
| 528 |  |   /* The code is optimised for these values only */ | 
| 529 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 530 | 0 |     return 0; | 
| 531 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 532 | 0 |     return 0; | 
| 533 |  |  | 
| 534 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 535 | 0 |       IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) | 
| 536 | 0 |     return 1; | 
| 537 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 538 | 0 |       IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 
| 539 | 0 |     return 1; | 
| 540 |  |  | 
| 541 | 0 |   return 0; | 
| 542 | 0 | } | 
| 543 |  |  | 
| 544 |  | GLOBAL(int) | 
| 545 |  | jsimd_can_h2v1_merged_upsample(void) | 
| 546 | 0 | { | 
| 547 | 0 |   init_simd(); | 
| 548 |  |  | 
| 549 |  |   /* The code is optimised for these values only */ | 
| 550 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 551 | 0 |     return 0; | 
| 552 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 553 | 0 |     return 0; | 
| 554 |  |  | 
| 555 | 0 |   if ((simd_support & JSIMD_AVX2) && | 
| 556 | 0 |       IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) | 
| 557 | 0 |     return 1; | 
| 558 | 0 |   if ((simd_support & JSIMD_SSE2) && | 
| 559 | 0 |       IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 
| 560 | 0 |     return 1; | 
| 561 |  |  | 
| 562 | 0 |   return 0; | 
| 563 | 0 | } | 
| 564 |  |  | 
| 565 |  | GLOBAL(void) | 
| 566 |  | jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, | 
| 567 |  |                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) | 
| 568 | 0 | { | 
| 569 | 0 |   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 
| 570 | 0 |   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 
| 571 |  | 
 | 
| 572 | 0 |   if (simd_support == ~0U) | 
| 573 | 0 |     init_simd(); | 
| 574 |  | 
 | 
| 575 | 0 |   switch (cinfo->out_color_space) { | 
| 576 | 0 |   case JCS_EXT_RGB: | 
| 577 | 0 |     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; | 
| 578 | 0 |     sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2; | 
| 579 | 0 |     break; | 
| 580 | 0 |   case JCS_EXT_RGBX: | 
| 581 | 0 |   case JCS_EXT_RGBA: | 
| 582 | 0 |     avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2; | 
| 583 | 0 |     sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2; | 
| 584 | 0 |     break; | 
| 585 | 0 |   case JCS_EXT_BGR: | 
| 586 | 0 |     avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2; | 
| 587 | 0 |     sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2; | 
| 588 | 0 |     break; | 
| 589 | 0 |   case JCS_EXT_BGRX: | 
| 590 | 0 |   case JCS_EXT_BGRA: | 
| 591 | 0 |     avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2; | 
| 592 | 0 |     sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2; | 
| 593 | 0 |     break; | 
| 594 | 0 |   case JCS_EXT_XBGR: | 
| 595 | 0 |   case JCS_EXT_ABGR: | 
| 596 | 0 |     avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2; | 
| 597 | 0 |     sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2; | 
| 598 | 0 |     break; | 
| 599 | 0 |   case JCS_EXT_XRGB: | 
| 600 | 0 |   case JCS_EXT_ARGB: | 
| 601 | 0 |     avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2; | 
| 602 | 0 |     sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2; | 
| 603 | 0 |     break; | 
| 604 | 0 |   default: | 
| 605 | 0 |     avx2fct = jsimd_h2v2_merged_upsample_avx2; | 
| 606 | 0 |     sse2fct = jsimd_h2v2_merged_upsample_sse2; | 
| 607 | 0 |     break; | 
| 608 | 0 |   } | 
| 609 |  |  | 
| 610 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 611 | 0 |     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 
| 612 | 0 |   else | 
| 613 | 0 |     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 
| 614 | 0 | } | 
| 615 |  |  | 
| 616 |  | GLOBAL(void) | 
| 617 |  | jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, | 
| 618 |  |                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) | 
| 619 | 0 | { | 
| 620 | 0 |   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 
| 621 | 0 |   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 
| 622 |  | 
 | 
| 623 | 0 |   if (simd_support == ~0U) | 
| 624 | 0 |     init_simd(); | 
| 625 |  | 
 | 
| 626 | 0 |   switch (cinfo->out_color_space) { | 
| 627 | 0 |   case JCS_EXT_RGB: | 
| 628 | 0 |     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; | 
| 629 | 0 |     sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2; | 
| 630 | 0 |     break; | 
| 631 | 0 |   case JCS_EXT_RGBX: | 
| 632 | 0 |   case JCS_EXT_RGBA: | 
| 633 | 0 |     avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2; | 
| 634 | 0 |     sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2; | 
| 635 | 0 |     break; | 
| 636 | 0 |   case JCS_EXT_BGR: | 
| 637 | 0 |     avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2; | 
| 638 | 0 |     sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2; | 
| 639 | 0 |     break; | 
| 640 | 0 |   case JCS_EXT_BGRX: | 
| 641 | 0 |   case JCS_EXT_BGRA: | 
| 642 | 0 |     avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2; | 
| 643 | 0 |     sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2; | 
| 644 | 0 |     break; | 
| 645 | 0 |   case JCS_EXT_XBGR: | 
| 646 | 0 |   case JCS_EXT_ABGR: | 
| 647 | 0 |     avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2; | 
| 648 | 0 |     sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2; | 
| 649 | 0 |     break; | 
| 650 | 0 |   case JCS_EXT_XRGB: | 
| 651 | 0 |   case JCS_EXT_ARGB: | 
| 652 | 0 |     avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2; | 
| 653 | 0 |     sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2; | 
| 654 | 0 |     break; | 
| 655 | 0 |   default: | 
| 656 | 0 |     avx2fct = jsimd_h2v1_merged_upsample_avx2; | 
| 657 | 0 |     sse2fct = jsimd_h2v1_merged_upsample_sse2; | 
| 658 | 0 |     break; | 
| 659 | 0 |   } | 
| 660 |  |  | 
| 661 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 662 | 0 |     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 
| 663 | 0 |   else | 
| 664 | 0 |     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 
| 665 | 0 | } | 
| 666 |  |  | 
| 667 |  | GLOBAL(int) | 
| 668 |  | jsimd_can_convsamp(void) | 
| 669 | 0 | { | 
| 670 | 0 |   init_simd(); | 
| 671 |  |  | 
| 672 |  |   /* The code is optimised for these values only */ | 
| 673 | 0 |   if (DCTSIZE != 8) | 
| 674 | 0 |     return 0; | 
| 675 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 676 | 0 |     return 0; | 
| 677 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 678 | 0 |     return 0; | 
| 679 | 0 |   if (sizeof(DCTELEM) != 2) | 
| 680 | 0 |     return 0; | 
| 681 |  |  | 
| 682 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 683 | 0 |     return 1; | 
| 684 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 685 | 0 |     return 1; | 
| 686 |  |  | 
| 687 | 0 |   return 0; | 
| 688 | 0 | } | 
| 689 |  |  | 
| 690 |  | GLOBAL(int) | 
| 691 |  | jsimd_can_convsamp_float(void) | 
| 692 | 0 | { | 
| 693 | 0 |   init_simd(); | 
| 694 |  |  | 
| 695 |  |   /* The code is optimised for these values only */ | 
| 696 | 0 |   if (DCTSIZE != 8) | 
| 697 | 0 |     return 0; | 
| 698 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 699 | 0 |     return 0; | 
| 700 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 701 | 0 |     return 0; | 
| 702 | 0 |   if (sizeof(FAST_FLOAT) != 4) | 
| 703 | 0 |     return 0; | 
| 704 |  |  | 
| 705 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 706 | 0 |     return 1; | 
| 707 |  |  | 
| 708 | 0 |   return 0; | 
| 709 | 0 | } | 
| 710 |  |  | 
| 711 |  | GLOBAL(void) | 
| 712 |  | jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, | 
| 713 |  |                DCTELEM *workspace) | 
| 714 | 0 | { | 
| 715 | 0 |   if (simd_support == ~0U) | 
| 716 | 0 |     init_simd(); | 
| 717 |  | 
 | 
| 718 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 719 | 0 |     jsimd_convsamp_avx2(sample_data, start_col, workspace); | 
| 720 | 0 |   else | 
| 721 | 0 |     jsimd_convsamp_sse2(sample_data, start_col, workspace); | 
| 722 | 0 | } | 
| 723 |  |  | 
| 724 |  | GLOBAL(void) | 
| 725 |  | jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, | 
| 726 |  |                      FAST_FLOAT *workspace) | 
| 727 | 0 | { | 
| 728 | 0 |   jsimd_convsamp_float_sse2(sample_data, start_col, workspace); | 
| 729 | 0 | } | 
| 730 |  |  | 
| 731 |  | GLOBAL(int) | 
| 732 |  | jsimd_can_fdct_islow(void) | 
| 733 | 0 | { | 
| 734 | 0 |   init_simd(); | 
| 735 |  |  | 
| 736 |  |   /* The code is optimised for these values only */ | 
| 737 | 0 |   if (DCTSIZE != 8) | 
| 738 | 0 |     return 0; | 
| 739 | 0 |   if (sizeof(DCTELEM) != 2) | 
| 740 | 0 |     return 0; | 
| 741 |  |  | 
| 742 | 0 |   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2)) | 
| 743 | 0 |     return 1; | 
| 744 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) | 
| 745 | 0 |     return 1; | 
| 746 |  |  | 
| 747 | 0 |   return 0; | 
| 748 | 0 | } | 
| 749 |  |  | 
| 750 |  | GLOBAL(int) | 
| 751 |  | jsimd_can_fdct_ifast(void) | 
| 752 | 0 | { | 
| 753 | 0 |   init_simd(); | 
| 754 |  |  | 
| 755 |  |   /* The code is optimised for these values only */ | 
| 756 | 0 |   if (DCTSIZE != 8) | 
| 757 | 0 |     return 0; | 
| 758 | 0 |   if (sizeof(DCTELEM) != 2) | 
| 759 | 0 |     return 0; | 
| 760 |  |  | 
| 761 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) | 
| 762 | 0 |     return 1; | 
| 763 |  |  | 
| 764 | 0 |   return 0; | 
| 765 | 0 | } | 
| 766 |  |  | 
| 767 |  | GLOBAL(int) | 
| 768 |  | jsimd_can_fdct_float(void) | 
| 769 | 0 | { | 
| 770 | 0 |   init_simd(); | 
| 771 |  |  | 
| 772 |  |   /* The code is optimised for these values only */ | 
| 773 | 0 |   if (DCTSIZE != 8) | 
| 774 | 0 |     return 0; | 
| 775 | 0 |   if (sizeof(FAST_FLOAT) != 4) | 
| 776 | 0 |     return 0; | 
| 777 |  |  | 
| 778 | 0 |   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) | 
| 779 | 0 |     return 1; | 
| 780 |  |  | 
| 781 | 0 |   return 0; | 
| 782 | 0 | } | 
| 783 |  |  | 
| 784 |  | GLOBAL(void) | 
| 785 |  | jsimd_fdct_islow(DCTELEM *data) | 
| 786 | 0 | { | 
| 787 | 0 |   if (simd_support == ~0U) | 
| 788 | 0 |     init_simd(); | 
| 789 |  | 
 | 
| 790 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 791 | 0 |     jsimd_fdct_islow_avx2(data); | 
| 792 | 0 |   else | 
| 793 | 0 |     jsimd_fdct_islow_sse2(data); | 
| 794 | 0 | } | 
| 795 |  |  | 
| 796 |  | GLOBAL(void) | 
| 797 |  | jsimd_fdct_ifast(DCTELEM *data) | 
| 798 | 0 | { | 
| 799 | 0 |   jsimd_fdct_ifast_sse2(data); | 
| 800 | 0 | } | 
| 801 |  |  | 
| 802 |  | GLOBAL(void) | 
| 803 |  | jsimd_fdct_float(FAST_FLOAT *data) | 
| 804 | 0 | { | 
| 805 | 0 |   jsimd_fdct_float_sse(data); | 
| 806 | 0 | } | 
| 807 |  |  | 
| 808 |  | GLOBAL(int) | 
| 809 |  | jsimd_can_quantize(void) | 
| 810 | 0 | { | 
| 811 | 0 |   init_simd(); | 
| 812 |  |  | 
| 813 |  |   /* The code is optimised for these values only */ | 
| 814 | 0 |   if (DCTSIZE != 8) | 
| 815 | 0 |     return 0; | 
| 816 | 0 |   if (sizeof(JCOEF) != 2) | 
| 817 | 0 |     return 0; | 
| 818 | 0 |   if (sizeof(DCTELEM) != 2) | 
| 819 | 0 |     return 0; | 
| 820 |  |  | 
| 821 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 822 | 0 |     return 1; | 
| 823 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 824 | 0 |     return 1; | 
| 825 |  |  | 
| 826 | 0 |   return 0; | 
| 827 | 0 | } | 
| 828 |  |  | 
| 829 |  | GLOBAL(int) | 
| 830 |  | jsimd_can_quantize_float(void) | 
| 831 | 0 | { | 
| 832 | 0 |   init_simd(); | 
| 833 |  |  | 
| 834 |  |   /* The code is optimised for these values only */ | 
| 835 | 0 |   if (DCTSIZE != 8) | 
| 836 | 0 |     return 0; | 
| 837 | 0 |   if (sizeof(JCOEF) != 2) | 
| 838 | 0 |     return 0; | 
| 839 | 0 |   if (sizeof(FAST_FLOAT) != 4) | 
| 840 | 0 |     return 0; | 
| 841 |  |  | 
| 842 | 0 |   if (simd_support & JSIMD_SSE2) | 
| 843 | 0 |     return 1; | 
| 844 |  |  | 
| 845 | 0 |   return 0; | 
| 846 | 0 | } | 
| 847 |  |  | 
| 848 |  | GLOBAL(void) | 
| 849 |  | jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) | 
| 850 | 0 | { | 
| 851 | 0 |   if (simd_support == ~0U) | 
| 852 | 0 |     init_simd(); | 
| 853 |  | 
 | 
| 854 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 855 | 0 |     jsimd_quantize_avx2(coef_block, divisors, workspace); | 
| 856 | 0 |   else | 
| 857 | 0 |     jsimd_quantize_sse2(coef_block, divisors, workspace); | 
| 858 | 0 | } | 
| 859 |  |  | 
| 860 |  | GLOBAL(void) | 
| 861 |  | jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, | 
| 862 |  |                      FAST_FLOAT *workspace) | 
| 863 | 0 | { | 
| 864 | 0 |   jsimd_quantize_float_sse2(coef_block, divisors, workspace); | 
| 865 | 0 | } | 
| 866 |  |  | 
| 867 |  | GLOBAL(int) | 
| 868 |  | jsimd_can_idct_2x2(void) | 
| 869 | 0 | { | 
| 870 | 0 |   init_simd(); | 
| 871 |  |  | 
| 872 |  |   /* The code is optimised for these values only */ | 
| 873 | 0 |   if (DCTSIZE != 8) | 
| 874 | 0 |     return 0; | 
| 875 | 0 |   if (sizeof(JCOEF) != 2) | 
| 876 | 0 |     return 0; | 
| 877 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 878 | 0 |     return 0; | 
| 879 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 880 | 0 |     return 0; | 
| 881 | 0 |   if (sizeof(ISLOW_MULT_TYPE) != 2) | 
| 882 | 0 |     return 0; | 
| 883 |  |  | 
| 884 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 
| 885 | 0 |     return 1; | 
| 886 |  |  | 
| 887 | 0 |   return 0; | 
| 888 | 0 | } | 
| 889 |  |  | 
| 890 |  | GLOBAL(int) | 
| 891 |  | jsimd_can_idct_4x4(void) | 
| 892 | 0 | { | 
| 893 | 0 |   init_simd(); | 
| 894 |  |  | 
| 895 |  |   /* The code is optimised for these values only */ | 
| 896 | 0 |   if (DCTSIZE != 8) | 
| 897 | 0 |     return 0; | 
| 898 | 0 |   if (sizeof(JCOEF) != 2) | 
| 899 | 0 |     return 0; | 
| 900 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 901 | 0 |     return 0; | 
| 902 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 903 | 0 |     return 0; | 
| 904 | 0 |   if (sizeof(ISLOW_MULT_TYPE) != 2) | 
| 905 | 0 |     return 0; | 
| 906 |  |  | 
| 907 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 
| 908 | 0 |     return 1; | 
| 909 |  |  | 
| 910 | 0 |   return 0; | 
| 911 | 0 | } | 
| 912 |  |  | 
| 913 |  | GLOBAL(void) | 
| 914 |  | jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 915 |  |                JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 916 |  |                JDIMENSION output_col) | 
| 917 | 0 | { | 
| 918 | 0 |   jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 
| 919 | 0 | } | 
| 920 |  |  | 
| 921 |  | GLOBAL(void) | 
| 922 |  | jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 923 |  |                JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 924 |  |                JDIMENSION output_col) | 
| 925 | 0 | { | 
| 926 | 0 |   jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 
| 927 | 0 | } | 
| 928 |  |  | 
| 929 |  | GLOBAL(int) | 
| 930 |  | jsimd_can_idct_islow(void) | 
| 931 | 0 | { | 
| 932 | 0 |   init_simd(); | 
| 933 |  |  | 
| 934 |  |   /* The code is optimised for these values only */ | 
| 935 | 0 |   if (DCTSIZE != 8) | 
| 936 | 0 |     return 0; | 
| 937 | 0 |   if (sizeof(JCOEF) != 2) | 
| 938 | 0 |     return 0; | 
| 939 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 940 | 0 |     return 0; | 
| 941 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 942 | 0 |     return 0; | 
| 943 | 0 |   if (sizeof(ISLOW_MULT_TYPE) != 2) | 
| 944 | 0 |     return 0; | 
| 945 |  |  | 
| 946 | 0 |   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2)) | 
| 947 | 0 |     return 1; | 
| 948 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) | 
| 949 | 0 |     return 1; | 
| 950 |  |  | 
| 951 | 0 |   return 0; | 
| 952 | 0 | } | 
| 953 |  |  | 
| 954 |  | GLOBAL(int) | 
| 955 |  | jsimd_can_idct_ifast(void) | 
| 956 | 0 | { | 
| 957 | 0 |   init_simd(); | 
| 958 |  |  | 
| 959 |  |   /* The code is optimised for these values only */ | 
| 960 | 0 |   if (DCTSIZE != 8) | 
| 961 | 0 |     return 0; | 
| 962 | 0 |   if (sizeof(JCOEF) != 2) | 
| 963 | 0 |     return 0; | 
| 964 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 965 | 0 |     return 0; | 
| 966 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 967 | 0 |     return 0; | 
| 968 | 0 |   if (sizeof(IFAST_MULT_TYPE) != 2) | 
| 969 | 0 |     return 0; | 
| 970 | 0 |   if (IFAST_SCALE_BITS != 2) | 
| 971 | 0 |     return 0; | 
| 972 |  |  | 
| 973 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) | 
| 974 | 0 |     return 1; | 
| 975 |  |  | 
| 976 | 0 |   return 0; | 
| 977 | 0 | } | 
| 978 |  |  | 
| 979 |  | GLOBAL(int) | 
| 980 |  | jsimd_can_idct_float(void) | 
| 981 | 0 | { | 
| 982 | 0 |   init_simd(); | 
| 983 |  | 
 | 
| 984 | 0 |   if (DCTSIZE != 8) | 
| 985 | 0 |     return 0; | 
| 986 | 0 |   if (sizeof(JCOEF) != 2) | 
| 987 | 0 |     return 0; | 
| 988 | 0 |   if (BITS_IN_JSAMPLE != 8) | 
| 989 | 0 |     return 0; | 
| 990 | 0 |   if (sizeof(JDIMENSION) != 4) | 
| 991 | 0 |     return 0; | 
| 992 | 0 |   if (sizeof(FAST_FLOAT) != 4) | 
| 993 | 0 |     return 0; | 
| 994 | 0 |   if (sizeof(FLOAT_MULT_TYPE) != 4) | 
| 995 | 0 |     return 0; | 
| 996 |  |  | 
| 997 | 0 |   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) | 
| 998 | 0 |     return 1; | 
| 999 |  |  | 
| 1000 | 0 |   return 0; | 
| 1001 | 0 | } | 
| 1002 |  |  | 
| 1003 |  | GLOBAL(void) | 
| 1004 |  | jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 1005 |  |                  JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 1006 |  |                  JDIMENSION output_col) | 
| 1007 | 0 | { | 
| 1008 | 0 |   if (simd_support == ~0U) | 
| 1009 | 0 |     init_simd(); | 
| 1010 |  | 
 | 
| 1011 | 0 |   if (simd_support & JSIMD_AVX2) | 
| 1012 | 0 |     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, | 
| 1013 | 0 |                           output_col); | 
| 1014 | 0 |   else | 
| 1015 | 0 |     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, | 
| 1016 | 0 |                           output_col); | 
| 1017 | 0 | } | 
| 1018 |  |  | 
| 1019 |  | GLOBAL(void) | 
| 1020 |  | jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 1021 |  |                  JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 1022 |  |                  JDIMENSION output_col) | 
| 1023 | 0 | { | 
| 1024 | 0 |   jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, | 
| 1025 | 0 |                         output_col); | 
| 1026 | 0 | } | 
| 1027 |  |  | 
| 1028 |  | GLOBAL(void) | 
| 1029 |  | jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 1030 |  |                  JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 1031 |  |                  JDIMENSION output_col) | 
| 1032 | 0 | { | 
| 1033 | 0 |   jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, | 
| 1034 | 0 |                         output_col); | 
| 1035 | 0 | } | 
| 1036 |  |  | 
| 1037 |  | GLOBAL(int) | 
| 1038 |  | jsimd_can_huff_encode_one_block(void) | 
| 1039 | 14.5k | { | 
| 1040 | 14.5k |   init_simd(); | 
| 1041 |  |  | 
| 1042 | 14.5k |   if (DCTSIZE != 8) | 
| 1043 | 0 |     return 0; | 
| 1044 | 14.5k |   if (sizeof(JCOEF) != 2) | 
| 1045 | 0 |     return 0; | 
| 1046 |  |  | 
| 1047 | 14.5k |   if ((simd_support & JSIMD_SSE2) && simd_huffman && | 
| 1048 | 14.5k |       IS_ALIGNED_SSE(jconst_huff_encode_one_block)) | 
| 1049 | 14.5k |     return 1; | 
| 1050 |  |  | 
| 1051 | 0 |   return 0; | 
| 1052 | 14.5k | } | 
| 1053 |  |  | 
| 1054 |  | GLOBAL(JOCTET *) | 
| 1055 |  | jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, | 
| 1056 |  |                             int last_dc_val, c_derived_tbl *dctbl, | 
| 1057 |  |                             c_derived_tbl *actbl) | 
| 1058 | 10.0M | { | 
| 1059 | 10.0M |   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, | 
| 1060 | 10.0M |                                           dctbl, actbl); | 
| 1061 | 10.0M | } | 
| 1062 |  |  | 
| 1063 |  | GLOBAL(int) | 
| 1064 |  | jsimd_can_encode_mcu_AC_first_prepare(void) | 
| 1065 | 18.8k | { | 
| 1066 | 18.8k |   init_simd(); | 
| 1067 |  |  | 
| 1068 | 18.8k |   if (DCTSIZE != 8) | 
| 1069 | 0 |     return 0; | 
| 1070 | 18.8k |   if (sizeof(JCOEF) != 2) | 
| 1071 | 0 |     return 0; | 
| 1072 | 18.8k |   if (simd_support & JSIMD_SSE2) | 
| 1073 | 18.8k |     return 1; | 
| 1074 |  |  | 
| 1075 | 0 |   return 0; | 
| 1076 | 18.8k | } | 
| 1077 |  |  | 
| 1078 |  | GLOBAL(void) | 
| 1079 |  | jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, | 
| 1080 |  |                                   const int *jpeg_natural_order_start, int Sl, | 
| 1081 |  |                                   int Al, UJCOEF *values, size_t *zerobits) | 
| 1082 | 8.85M | { | 
| 1083 | 8.85M |   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, | 
| 1084 | 8.85M |                                          Sl, Al, values, zerobits); | 
| 1085 | 8.85M | } | 
| 1086 |  |  | 
| 1087 |  | GLOBAL(int) | 
| 1088 |  | jsimd_can_encode_mcu_AC_refine_prepare(void) | 
| 1089 | 15.0k | { | 
| 1090 | 15.0k |   init_simd(); | 
| 1091 |  |  | 
| 1092 | 15.0k |   if (DCTSIZE != 8) | 
| 1093 | 0 |     return 0; | 
| 1094 | 15.0k |   if (sizeof(JCOEF) != 2) | 
| 1095 | 0 |     return 0; | 
| 1096 | 15.0k |   if (simd_support & JSIMD_SSE2) | 
| 1097 | 15.0k |     return 1; | 
| 1098 |  |  | 
| 1099 | 0 |   return 0; | 
| 1100 | 15.0k | } | 
| 1101 |  |  | 
| 1102 |  | GLOBAL(int) | 
| 1103 |  | jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, | 
| 1104 |  |                                    const int *jpeg_natural_order_start, int Sl, | 
| 1105 |  |                                    int Al, UJCOEF *absvalues, size_t *bits) | 
| 1106 | 8.85M | { | 
| 1107 | 8.85M |   return jsimd_encode_mcu_AC_refine_prepare_sse2(block, | 
| 1108 | 8.85M |                                                  jpeg_natural_order_start, | 
| 1109 | 8.85M |                                                  Sl, Al, absvalues, bits); | 
| 1110 | 8.85M | } |