/src/ffmpeg/libavcodec/x86/diracdsp_init.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2010 David Conrad |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/x86/cpu.h" |
22 | | #include "libavcodec/diracdsp.h" |
23 | | #include "fpel.h" |
24 | | |
25 | | void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); |
26 | | |
27 | | void ff_add_dirac_obmc8_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); |
28 | | void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); |
29 | | void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); |
30 | | |
31 | | void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); |
32 | | void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); |
33 | | void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); |
34 | | |
35 | | void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); |
36 | | |
37 | | #if HAVE_X86ASM |
38 | | |
39 | | #define HPEL_FILTER(MMSIZE, EXT) \ |
40 | | void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ |
41 | | void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ |
42 | | \ |
43 | | static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ |
44 | | const uint8_t *src, int stride, int width, int height) \ |
45 | 6.23k | { \ |
46 | 1.78M | while( height-- ) \ |
47 | 1.77M | { \ |
48 | 1.77M | ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ |
49 | 1.77M | ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ |
50 | 1.77M | ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ |
51 | 1.77M | \ |
52 | 1.77M | dsth += stride; \ |
53 | 1.77M | dstv += stride; \ |
54 | 1.77M | dstc += stride; \ |
55 | 1.77M | src += stride; \ |
56 | 1.77M | } \ |
57 | 6.23k | } |
58 | | |
59 | | #define DIRAC_PIXOP(OPNAME, EXT)\ |
60 | | static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ |
61 | 1.61M | int stride, int h) \ |
62 | 1.61M | {\ |
63 | 1.61M | if (h&3)\ |
64 | 1.61M | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ |
65 | 1.61M | else\ |
66 | 1.61M | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ |
67 | 1.61M | }\ diracdsp_init.c:put_dirac_pixels16_sse2 Line | Count | Source | 61 | 1.30M | int stride, int h) \ | 62 | 1.30M | {\ | 63 | 1.30M | if (h&3)\ | 64 | 1.30M | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | 65 | 1.30M | else\ | 66 | 1.30M | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | 67 | 1.30M | }\ |
diracdsp_init.c:avg_dirac_pixels16_sse2 Line | Count | Source | 61 | 310k | int stride, int h) \ | 62 | 310k | {\ | 63 | 310k | if (h&3)\ | 64 | 310k | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | 65 | 310k | else\ | 66 | 310k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | 67 | 310k | }\ |
|
68 | | static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ |
69 | 680k | int stride, int h) \ |
70 | 680k | {\ |
71 | 680k | if (h&3) {\ |
72 | 52.8k | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ |
73 | 627k | } else {\ |
74 | 627k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ |
75 | 627k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ |
76 | 627k | }\ |
77 | 680k | } diracdsp_init.c:put_dirac_pixels32_sse2 Line | Count | Source | 69 | 627k | int stride, int h) \ | 70 | 627k | {\ | 71 | 627k | if (h&3) {\ | 72 | 42.5k | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | 73 | 585k | } else {\ | 74 | 585k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | 75 | 585k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | 76 | 585k | }\ | 77 | 627k | } |
diracdsp_init.c:avg_dirac_pixels32_sse2 Line | Count | Source | 69 | 52.9k | int stride, int h) \ | 70 | 52.9k | {\ | 71 | 52.9k | if (h&3) {\ | 72 | 10.2k | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | 73 | 42.6k | } else {\ | 74 | 42.6k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | 75 | 42.6k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | 76 | 42.6k | }\ | 77 | 52.9k | } |
|
78 | | |
79 | | DIRAC_PIXOP(put, sse2) |
80 | | DIRAC_PIXOP(avg, sse2) |
81 | | |
82 | | HPEL_FILTER(16, sse2) |
83 | | |
84 | | #endif // HAVE_X86ASM |
85 | | |
86 | | void ff_diracdsp_init_x86(DiracDSPContext* c) |
87 | 8.95k | { |
88 | 8.95k | #if HAVE_X86ASM |
89 | 8.95k | int mm_flags = av_get_cpu_flags(); |
90 | | |
91 | 8.95k | if (EXTERNAL_SSE2(mm_flags)) { |
92 | 7.24k | c->dirac_hpel_filter = dirac_hpel_filter_sse2; |
93 | 7.24k | c->add_rect_clamped = ff_add_rect_clamped_sse2; |
94 | 7.24k | c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; |
95 | | |
96 | 7.24k | c->add_dirac_obmc[0] = ff_add_dirac_obmc8_sse2; |
97 | 7.24k | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; |
98 | 7.24k | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; |
99 | | |
100 | 7.24k | c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2; |
101 | 7.24k | c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2; |
102 | 7.24k | c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2; |
103 | 7.24k | c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2; |
104 | 7.24k | } |
105 | | |
106 | 8.95k | if (EXTERNAL_SSE4(mm_flags)) { |
107 | 7.24k | c->dequant_subband[1] = ff_dequant_subband_32_sse4; |
108 | 7.24k | c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; |
109 | 7.24k | } |
110 | 8.95k | #endif // HAVE_X86ASM |
111 | 8.95k | } |