/src/libpng/intel/intel_init.c
Line | Count | Source |
1 | | /* intel_init.c - SSE2 optimized filter functions |
2 | | * |
3 | | * Copyright (c) 2018 Cosmin Truta |
4 | | * Copyright (c) 2016-2017 Glenn Randers-Pehrson |
5 | | * Written by Mike Klein and Matt Sarett, Google, Inc. |
6 | | * Derived from arm/arm_init.c |
7 | | * |
8 | | * This code is released under the libpng license. |
9 | | * For conditions of distribution and use, see the disclaimer |
10 | | * and license in png.h |
11 | | */ |
12 | | #define png_target_impl "intel-sse" |
13 | | |
14 | | #include "filter_sse2_intrinsics.c" |
15 | | |
16 | | static void |
17 | | png_init_filter_functions_sse2(png_struct *pp, unsigned int bpp) |
18 | 43.8k | { |
19 | | /* The techniques used to implement each of these filters in SSE operate on |
20 | | * one pixel at a time. |
21 | | * So they generally speed up 3bpp images about 3x, 4bpp images about 4x. |
22 | | * They can scale up to 6 and 8 bpp images and down to 2 bpp images, |
23 | | * but they'd not likely have any benefit for 1bpp images. |
24 | | * Most of these can be implemented using only MMX and 64-bit registers, |
25 | | * but they end up a bit slower than using the equally-ubiquitous SSE2. |
26 | | */ |
27 | 43.8k | png_debug(1, "in png_init_filter_functions_sse2"); |
28 | 43.8k | if (bpp == 3) |
29 | 16.6k | { |
30 | 16.6k | pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; |
31 | 16.6k | pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2; |
32 | 16.6k | pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
33 | 16.6k | png_read_filter_row_paeth3_sse2; |
34 | 16.6k | } |
35 | 27.1k | else if (bpp == 4) |
36 | 7.07k | { |
37 | 7.07k | pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2; |
38 | 7.07k | pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2; |
39 | 7.07k | pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
40 | 7.07k | png_read_filter_row_paeth4_sse2; |
41 | 7.07k | } |
42 | | |
43 | | /* No need optimize PNG_FILTER_VALUE_UP. The compiler should |
44 | | * autovectorize. |
45 | | */ |
46 | 43.8k | } |
47 | | |
48 | 43.8k | #define png_target_init_filter_functions_impl png_init_filter_functions_sse2 |