/src/ffmpeg/libavcodec/x86/diracdsp_init.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2010 David Conrad |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/x86/cpu.h" |
22 | | #include "libavcodec/diracdsp.h" |
23 | | #include "fpel.h" |
24 | | |
25 | | void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); |
26 | | |
27 | | void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); |
28 | | |
29 | | void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); |
30 | | void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); |
31 | | |
32 | | void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); |
33 | | void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); |
34 | | void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); |
35 | | |
36 | | void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); |
37 | | |
38 | | #if HAVE_X86ASM |
39 | | |
40 | | #define HPEL_FILTER(MMSIZE, EXT) \ |
41 | | void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ |
42 | | void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ |
43 | | \ |
44 | | static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ |
45 | | const uint8_t *src, int stride, int width, int height) \ |
46 | 6.52k | { \ |
47 | 1.91M | while( height-- ) \ |
48 | 1.90M | { \ |
49 | 1.90M | ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ |
50 | 1.90M | ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ |
51 | 1.90M | ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ |
52 | 1.90M | \ |
53 | 1.90M | dsth += stride; \ |
54 | 1.90M | dstv += stride; \ |
55 | 1.90M | dstc += stride; \ |
56 | 1.90M | src += stride; \ |
57 | 1.90M | } \ |
58 | 6.52k | } |
59 | | |
60 | | #define PIXFUNC(PFX, IDX, EXT) \ |
61 | | /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = PFX ## _dirac_pixels8_ ## EXT;*/ \ |
62 | | c->PFX ## _dirac_pixels_tab[1][IDX] = PFX ## _dirac_pixels16_ ## EXT; \ |
63 | | c->PFX ## _dirac_pixels_tab[2][IDX] = PFX ## _dirac_pixels32_ ## EXT |
64 | | |
65 | | #define DIRAC_PIXOP(OPNAME, EXT)\ |
66 | | static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ |
67 | 1.49M | int stride, int h) \ |
68 | 1.49M | {\ |
69 | 1.49M | if (h&3)\ |
70 | 1.49M | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ |
71 | 1.49M | else\ |
72 | 1.49M | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ |
73 | 1.49M | }\ diracdsp_init.c:put_dirac_pixels16_sse2 Line | Count | Source | 67 | 1.13M | int stride, int h) \ | 68 | 1.13M | {\ | 69 | 1.13M | if (h&3)\ | 70 | 1.13M | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | 71 | 1.13M | else\ | 72 | 1.13M | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | 73 | 1.13M | }\ |
diracdsp_init.c:avg_dirac_pixels16_sse2 Line | Count | Source | 67 | 355k | int stride, int h) \ | 68 | 355k | {\ | 69 | 355k | if (h&3)\ | 70 | 355k | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | 71 | 355k | else\ | 72 | 355k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | 73 | 355k | }\ |
|
74 | | static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ |
75 | 534k | int stride, int h) \ |
76 | 534k | {\ |
77 | 534k | if (h&3) {\ |
78 | 36.6k | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ |
79 | 497k | } else {\ |
80 | 497k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ |
81 | 497k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ |
82 | 497k | }\ |
83 | 534k | } diracdsp_init.c:put_dirac_pixels32_sse2 Line | Count | Source | 75 | 489k | int stride, int h) \ | 76 | 489k | {\ | 77 | 489k | if (h&3) {\ | 78 | 31.6k | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | 79 | 458k | } else {\ | 80 | 458k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | 81 | 458k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | 82 | 458k | }\ | 83 | 489k | } |
diracdsp_init.c:avg_dirac_pixels32_sse2 Line | Count | Source | 75 | 44.8k | int stride, int h) \ | 76 | 44.8k | {\ | 77 | 44.8k | if (h&3) {\ | 78 | 4.95k | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | 79 | 39.8k | } else {\ | 80 | 39.8k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | 81 | 39.8k | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | 82 | 39.8k | }\ | 83 | 44.8k | } |
|
84 | | |
85 | | DIRAC_PIXOP(put, sse2) |
86 | | DIRAC_PIXOP(avg, sse2) |
87 | | |
88 | | HPEL_FILTER(16, sse2) |
89 | | |
90 | | #endif // HAVE_X86ASM |
91 | | |
92 | | void ff_diracdsp_init_x86(DiracDSPContext* c) |
93 | 8.39k | { |
94 | 8.39k | #if HAVE_X86ASM |
95 | 8.39k | int mm_flags = av_get_cpu_flags(); |
96 | | |
97 | 8.39k | if (EXTERNAL_MMX(mm_flags)) { |
98 | 6.79k | c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; |
99 | 6.79k | } |
100 | | |
101 | 8.39k | if (EXTERNAL_SSE2(mm_flags)) { |
102 | 6.79k | c->dirac_hpel_filter = dirac_hpel_filter_sse2; |
103 | 6.79k | c->add_rect_clamped = ff_add_rect_clamped_sse2; |
104 | 6.79k | c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; |
105 | | |
106 | 6.79k | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; |
107 | 6.79k | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; |
108 | | |
109 | 6.79k | c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2; |
110 | 6.79k | c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2; |
111 | 6.79k | c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2; |
112 | 6.79k | c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2; |
113 | 6.79k | } |
114 | | |
115 | 8.39k | if (EXTERNAL_SSE4(mm_flags)) { |
116 | 6.79k | c->dequant_subband[1] = ff_dequant_subband_32_sse4; |
117 | 6.79k | c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; |
118 | 6.79k | } |
119 | 8.39k | #endif |
120 | 8.39k | } |