/src/ffmpeg/libavcodec/h26x/h2656_inter_template.c
Line | Count | Source |
1 | | /* |
2 | | * inter prediction template for HEVC/VVC |
3 | | * |
4 | | * Copyright (C) 2022 Nuo Mi |
5 | | * Copyright (C) 2024 Wu Jianhua |
6 | | * |
7 | | * This file is part of FFmpeg. |
8 | | * |
9 | | * FFmpeg is free software; you can redistribute it and/or |
10 | | * modify it under the terms of the GNU Lesser General Public |
11 | | * License as published by the Free Software Foundation; either |
12 | | * version 2.1 of the License, or (at your option) any later version. |
13 | | * |
14 | | * FFmpeg is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | | * Lesser General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU Lesser General Public |
20 | | * License along with FFmpeg; if not, write to the Free Software |
21 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | | */ |
23 | | |
24 | 11.4M | #define CHROMA_EXTRA_BEFORE 1 |
25 | 70.3M | #define CHROMA_EXTRA 3 |
26 | 5.07M | #define LUMA_EXTRA_BEFORE 3 |
27 | 51.6M | #define LUMA_EXTRA 7 |
28 | | |
29 | | static void FUNC(put_pixels)(int16_t *dst, |
30 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
31 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
32 | 9.41M | { |
33 | 9.41M | const pixel *src = (const pixel *)_src; |
34 | 9.41M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
35 | | |
36 | 112M | for (int y = 0; y < height; y++) { |
37 | 1.92G | for (int x = 0; x < width; x++) |
38 | 1.81G | dst[x] = src[x] << (14 - BIT_DEPTH); |
39 | 102M | src += src_stride; |
40 | 102M | dst += MAX_PB_SIZE; |
41 | 102M | } |
42 | 9.41M | } Line | Count | Source | 32 | 496k | { | 33 | 496k | const pixel *src = (const pixel *)_src; | 34 | 496k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 11.2M | for (int y = 0; y < height; y++) { | 37 | 233M | for (int x = 0; x < width; x++) | 38 | 222M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 10.7M | src += src_stride; | 40 | 10.7M | dst += MAX_PB_SIZE; | 41 | 10.7M | } | 42 | 496k | } |
Line | Count | Source | 32 | 351k | { | 33 | 351k | const pixel *src = (const pixel *)_src; | 34 | 351k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 4.57M | for (int y = 0; y < height; y++) { | 37 | 84.1M | for (int x = 0; x < width; x++) | 38 | 79.9M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 4.22M | src += src_stride; | 40 | 4.22M | dst += MAX_PB_SIZE; | 41 | 4.22M | } | 42 | 351k | } |
Line | Count | Source | 32 | 2.31M | { | 33 | 2.31M | const pixel *src = (const pixel *)_src; | 34 | 2.31M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 25.4M | for (int y = 0; y < height; y++) { | 37 | 424M | for (int x = 0; x < width; x++) | 38 | 400M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 23.1M | src += src_stride; | 40 | 23.1M | dst += MAX_PB_SIZE; | 41 | 23.1M | } | 42 | 2.31M | } |
Line | Count | Source | 32 | 1.79M | { | 33 | 1.79M | const pixel *src = (const pixel *)_src; | 34 | 1.79M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 20.4M | for (int y = 0; y < height; y++) { | 37 | 336M | for (int x = 0; x < width; x++) | 38 | 317M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 18.6M | src += src_stride; | 40 | 18.6M | dst += MAX_PB_SIZE; | 41 | 18.6M | } | 42 | 1.79M | } |
Line | Count | Source | 32 | 2.31M | { | 33 | 2.31M | const pixel *src = (const pixel *)_src; | 34 | 2.31M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 25.4M | for (int y = 0; y < height; y++) { | 37 | 424M | for (int x = 0; x < width; x++) | 38 | 400M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 23.1M | src += src_stride; | 40 | 23.1M | dst += MAX_PB_SIZE; | 41 | 23.1M | } | 42 | 2.31M | } |
Line | Count | Source | 32 | 351k | { | 33 | 351k | const pixel *src = (const pixel *)_src; | 34 | 351k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 4.57M | for (int y = 0; y < height; y++) { | 37 | 84.1M | for (int x = 0; x < width; x++) | 38 | 79.9M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 4.22M | src += src_stride; | 40 | 4.22M | dst += MAX_PB_SIZE; | 41 | 4.22M | } | 42 | 351k | } |
Line | Count | Source | 32 | 1.79M | { | 33 | 1.79M | const pixel *src = (const pixel *)_src; | 34 | 1.79M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 35 | | | 36 | 20.4M | for (int y = 0; y < height; y++) { | 37 | 336M | for (int x = 0; x < width; x++) | 38 | 317M | dst[x] = src[x] << (14 - BIT_DEPTH); | 39 | 18.6M | src += src_stride; | 40 | 18.6M | dst += MAX_PB_SIZE; | 41 | 18.6M | } | 42 | 1.79M | } |
|
43 | | |
44 | | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
45 | | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, |
46 | | const int8_t *hf, const int8_t *vf, const int width) |
47 | 2.13M | { |
48 | 2.13M | const pixel *src = (const pixel *)_src; |
49 | 2.13M | pixel *dst = (pixel *)_dst; |
50 | 2.13M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
51 | 2.13M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
52 | | |
53 | 18.6M | for (int y = 0; y < height; y++) { |
54 | 16.4M | memcpy(dst, src, width * sizeof(pixel)); |
55 | 16.4M | src += src_stride; |
56 | 16.4M | dst += dst_stride; |
57 | 16.4M | } |
58 | 2.13M | } Line | Count | Source | 47 | 161k | { | 48 | 161k | const pixel *src = (const pixel *)_src; | 49 | 161k | pixel *dst = (pixel *)_dst; | 50 | 161k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 51 | 161k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 52 | | | 53 | 1.35M | for (int y = 0; y < height; y++) { | 54 | 1.18M | memcpy(dst, src, width * sizeof(pixel)); | 55 | 1.18M | src += src_stride; | 56 | 1.18M | dst += dst_stride; | 57 | 1.18M | } | 58 | 161k | } |
Line | Count | Source | 47 | 95.6k | { | 48 | 95.6k | const pixel *src = (const pixel *)_src; | 49 | 95.6k | pixel *dst = (pixel *)_dst; | 50 | 95.6k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 51 | 95.6k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 52 | | | 53 | 792k | for (int y = 0; y < height; y++) { | 54 | 696k | memcpy(dst, src, width * sizeof(pixel)); | 55 | 696k | src += src_stride; | 56 | 696k | dst += dst_stride; | 57 | 696k | } | 58 | 95.6k | } |
Line | Count | Source | 47 | 1.14M | { | 48 | 1.14M | const pixel *src = (const pixel *)_src; | 49 | 1.14M | pixel *dst = (pixel *)_dst; | 50 | 1.14M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 51 | 1.14M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 52 | | | 53 | 10.1M | for (int y = 0; y < height; y++) { | 54 | 8.98M | memcpy(dst, src, width * sizeof(pixel)); | 55 | 8.98M | src += src_stride; | 56 | 8.98M | dst += dst_stride; | 57 | 8.98M | } | 58 | 1.14M | } |
Line | Count | Source | 47 | 733k | { | 48 | 733k | const pixel *src = (const pixel *)_src; | 49 | 733k | pixel *dst = (pixel *)_dst; | 50 | 733k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 51 | 733k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 52 | | | 53 | 6.32M | for (int y = 0; y < height; y++) { | 54 | 5.59M | memcpy(dst, src, width * sizeof(pixel)); | 55 | 5.59M | src += src_stride; | 56 | 5.59M | dst += dst_stride; | 57 | 5.59M | } | 58 | 733k | } |
|
59 | | |
60 | | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
61 | | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, |
62 | | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, |
63 | | const int width) |
64 | 1.39M | { |
65 | 1.39M | const pixel *src = (const pixel *)_src; |
66 | 1.39M | pixel *dst = (pixel *)_dst; |
67 | 1.39M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
68 | 1.39M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
69 | 1.39M | const int shift = denom + 14 - BIT_DEPTH; |
70 | 1.39M | #if BIT_DEPTH < 14 |
71 | 1.39M | const int offset = 1 << (shift - 1); |
72 | | #else |
73 | | const int offset = 0; |
74 | | #endif |
75 | 1.39M | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
76 | | |
77 | 12.4M | for (int y = 0; y < height; y++) { |
78 | 164M | for (int x = 0; x < width; x++) { |
79 | 153M | const int v = (src[x] << (14 - BIT_DEPTH)); |
80 | 153M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); |
81 | 153M | } |
82 | 11.0M | src += src_stride; |
83 | 11.0M | dst += dst_stride; |
84 | 11.0M | } |
85 | 1.39M | } Line | Count | Source | 64 | 54.0k | { | 65 | 54.0k | const pixel *src = (const pixel *)_src; | 66 | 54.0k | pixel *dst = (pixel *)_dst; | 67 | 54.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 54.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 54.0k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 54.0k | #if BIT_DEPTH < 14 | 71 | 54.0k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 54.0k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 927k | for (int y = 0; y < height; y++) { | 78 | 18.0M | for (int x = 0; x < width; x++) { | 79 | 17.1M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 17.1M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 17.1M | } | 82 | 873k | src += src_stride; | 83 | 873k | dst += dst_stride; | 84 | 873k | } | 85 | 54.0k | } |
dsp.c:put_uni_w_pixels_10 Line | Count | Source | 64 | 48.2k | { | 65 | 48.2k | const pixel *src = (const pixel *)_src; | 66 | 48.2k | pixel *dst = (pixel *)_dst; | 67 | 48.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 48.2k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 48.2k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 48.2k | #if BIT_DEPTH < 14 | 71 | 48.2k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 48.2k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 683k | for (int y = 0; y < height; y++) { | 78 | 13.4M | for (int x = 0; x < width; x++) { | 79 | 12.8M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 12.8M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 12.8M | } | 82 | 634k | src += src_stride; | 83 | 634k | dst += dst_stride; | 84 | 634k | } | 85 | 48.2k | } |
dsp.c:put_uni_w_pixels_12 Line | Count | Source | 64 | 385k | { | 65 | 385k | const pixel *src = (const pixel *)_src; | 66 | 385k | pixel *dst = (pixel *)_dst; | 67 | 385k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 385k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 385k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 385k | #if BIT_DEPTH < 14 | 71 | 385k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 385k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 3.32M | for (int y = 0; y < height; y++) { | 78 | 42.2M | for (int x = 0; x < width; x++) { | 79 | 39.2M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 39.2M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 39.2M | } | 82 | 2.94M | src += src_stride; | 83 | 2.94M | dst += dst_stride; | 84 | 2.94M | } | 85 | 385k | } |
Line | Count | Source | 64 | 236k | { | 65 | 236k | const pixel *src = (const pixel *)_src; | 66 | 236k | pixel *dst = (pixel *)_dst; | 67 | 236k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 236k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 236k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 236k | #if BIT_DEPTH < 14 | 71 | 236k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 236k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 1.74M | for (int y = 0; y < height; y++) { | 78 | 17.6M | for (int x = 0; x < width; x++) { | 79 | 16.1M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 16.1M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 16.1M | } | 82 | 1.51M | src += src_stride; | 83 | 1.51M | dst += dst_stride; | 84 | 1.51M | } | 85 | 236k | } |
dsp.c:put_uni_w_pixels_12 Line | Count | Source | 64 | 385k | { | 65 | 385k | const pixel *src = (const pixel *)_src; | 66 | 385k | pixel *dst = (pixel *)_dst; | 67 | 385k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 385k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 385k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 385k | #if BIT_DEPTH < 14 | 71 | 385k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 385k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 3.32M | for (int y = 0; y < height; y++) { | 78 | 42.2M | for (int x = 0; x < width; x++) { | 79 | 39.2M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 39.2M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 39.2M | } | 82 | 2.94M | src += src_stride; | 83 | 2.94M | dst += dst_stride; | 84 | 2.94M | } | 85 | 385k | } |
dsp.c:put_uni_w_pixels_10 Line | Count | Source | 64 | 48.2k | { | 65 | 48.2k | const pixel *src = (const pixel *)_src; | 66 | 48.2k | pixel *dst = (pixel *)_dst; | 67 | 48.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 48.2k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 48.2k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 48.2k | #if BIT_DEPTH < 14 | 71 | 48.2k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 48.2k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 683k | for (int y = 0; y < height; y++) { | 78 | 13.4M | for (int x = 0; x < width; x++) { | 79 | 12.8M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 12.8M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 12.8M | } | 82 | 634k | src += src_stride; | 83 | 634k | dst += dst_stride; | 84 | 634k | } | 85 | 48.2k | } |
Line | Count | Source | 64 | 236k | { | 65 | 236k | const pixel *src = (const pixel *)_src; | 66 | 236k | pixel *dst = (pixel *)_dst; | 67 | 236k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 68 | 236k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 69 | 236k | const int shift = denom + 14 - BIT_DEPTH; | 70 | 236k | #if BIT_DEPTH < 14 | 71 | 236k | const int offset = 1 << (shift - 1); | 72 | | #else | 73 | | const int offset = 0; | 74 | | #endif | 75 | 236k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 76 | | | 77 | 1.74M | for (int y = 0; y < height; y++) { | 78 | 17.6M | for (int x = 0; x < width; x++) { | 79 | 16.1M | const int v = (src[x] << (14 - BIT_DEPTH)); | 80 | 16.1M | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | 81 | 16.1M | } | 82 | 1.51M | src += src_stride; | 83 | 1.51M | dst += dst_stride; | 84 | 1.51M | } | 85 | 236k | } |
|
86 | | |
87 | | #define LUMA_FILTER(src, stride) \ |
88 | 1.84G | (filter[0] * src[x - 3 * stride] + \ |
89 | 1.84G | filter[1] * src[x - 2 * stride] + \ |
90 | 1.84G | filter[2] * src[x - stride] + \ |
91 | 1.84G | filter[3] * src[x ] + \ |
92 | 1.84G | filter[4] * src[x + stride] + \ |
93 | 1.84G | filter[5] * src[x + 2 * stride] + \ |
94 | 1.84G | filter[6] * src[x + 3 * stride] + \ |
95 | 1.84G | filter[7] * src[x + 4 * stride]) |
96 | | |
97 | | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
98 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
99 | 350k | { |
100 | 350k | const pixel *src = (const pixel*)_src; |
101 | 350k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
102 | 350k | const int8_t *filter = hf; |
103 | | |
104 | 5.98M | for (int y = 0; y < height; y++) { |
105 | 149M | for (int x = 0; x < width; x++) |
106 | 144M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
107 | 5.63M | src += src_stride; |
108 | 5.63M | dst += MAX_PB_SIZE; |
109 | 5.63M | } |
110 | 350k | } Line | Count | Source | 99 | 25.8k | { | 100 | 25.8k | const pixel *src = (const pixel*)_src; | 101 | 25.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 25.8k | const int8_t *filter = hf; | 103 | | | 104 | 528k | for (int y = 0; y < height; y++) { | 105 | 13.8M | for (int x = 0; x < width; x++) | 106 | 13.3M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 502k | src += src_stride; | 108 | 502k | dst += MAX_PB_SIZE; | 109 | 502k | } | 110 | 25.8k | } |
Line | Count | Source | 99 | 22.0k | { | 100 | 22.0k | const pixel *src = (const pixel*)_src; | 101 | 22.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 22.0k | const int8_t *filter = hf; | 103 | | | 104 | 562k | for (int y = 0; y < height; y++) { | 105 | 19.5M | for (int x = 0; x < width; x++) | 106 | 18.9M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 540k | src += src_stride; | 108 | 540k | dst += MAX_PB_SIZE; | 109 | 540k | } | 110 | 22.0k | } |
Line | Count | Source | 99 | 74.9k | { | 100 | 74.9k | const pixel *src = (const pixel*)_src; | 101 | 74.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 74.9k | const int8_t *filter = hf; | 103 | | | 104 | 1.01M | for (int y = 0; y < height; y++) { | 105 | 21.4M | for (int x = 0; x < width; x++) | 106 | 20.4M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 939k | src += src_stride; | 108 | 939k | dst += MAX_PB_SIZE; | 109 | 939k | } | 110 | 74.9k | } |
Line | Count | Source | 99 | 65.3k | { | 100 | 65.3k | const pixel *src = (const pixel*)_src; | 101 | 65.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 65.3k | const int8_t *filter = hf; | 103 | | | 104 | 1.15M | for (int y = 0; y < height; y++) { | 105 | 27.1M | for (int x = 0; x < width; x++) | 106 | 26.0M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 1.08M | src += src_stride; | 108 | 1.08M | dst += MAX_PB_SIZE; | 109 | 1.08M | } | 110 | 65.3k | } |
Line | Count | Source | 99 | 74.9k | { | 100 | 74.9k | const pixel *src = (const pixel*)_src; | 101 | 74.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 74.9k | const int8_t *filter = hf; | 103 | | | 104 | 1.01M | for (int y = 0; y < height; y++) { | 105 | 21.4M | for (int x = 0; x < width; x++) | 106 | 20.4M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 939k | src += src_stride; | 108 | 939k | dst += MAX_PB_SIZE; | 109 | 939k | } | 110 | 74.9k | } |
Line | Count | Source | 99 | 22.0k | { | 100 | 22.0k | const pixel *src = (const pixel*)_src; | 101 | 22.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 22.0k | const int8_t *filter = hf; | 103 | | | 104 | 562k | for (int y = 0; y < height; y++) { | 105 | 19.5M | for (int x = 0; x < width; x++) | 106 | 18.9M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 540k | src += src_stride; | 108 | 540k | dst += MAX_PB_SIZE; | 109 | 540k | } | 110 | 22.0k | } |
Line | Count | Source | 99 | 65.3k | { | 100 | 65.3k | const pixel *src = (const pixel*)_src; | 101 | 65.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 102 | 65.3k | const int8_t *filter = hf; | 103 | | | 104 | 1.15M | for (int y = 0; y < height; y++) { | 105 | 27.1M | for (int x = 0; x < width; x++) | 106 | 26.0M | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 107 | 1.08M | src += src_stride; | 108 | 1.08M | dst += MAX_PB_SIZE; | 109 | 1.08M | } | 110 | 65.3k | } |
|
111 | | |
112 | | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
113 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
114 | 267k | { |
115 | 267k | const pixel *src = (pixel*)_src; |
116 | 267k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
117 | 267k | const int8_t *filter = vf; |
118 | | |
119 | 4.76M | for (int y = 0; y < height; y++) { |
120 | 126M | for (int x = 0; x < width; x++) |
121 | 122M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); |
122 | 4.50M | src += src_stride; |
123 | 4.50M | dst += MAX_PB_SIZE; |
124 | 4.50M | } |
125 | 267k | } Line | Count | Source | 114 | 29.6k | { | 115 | 29.6k | const pixel *src = (pixel*)_src; | 116 | 29.6k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 29.6k | const int8_t *filter = vf; | 118 | | | 119 | 738k | for (int y = 0; y < height; y++) { | 120 | 22.4M | for (int x = 0; x < width; x++) | 121 | 21.7M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 708k | src += src_stride; | 123 | 708k | dst += MAX_PB_SIZE; | 124 | 708k | } | 125 | 29.6k | } |
Line | Count | Source | 114 | 12.0k | { | 115 | 12.0k | const pixel *src = (pixel*)_src; | 116 | 12.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 12.0k | const int8_t *filter = vf; | 118 | | | 119 | 258k | for (int y = 0; y < height; y++) { | 120 | 8.90M | for (int x = 0; x < width; x++) | 121 | 8.65M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 246k | src += src_stride; | 123 | 246k | dst += MAX_PB_SIZE; | 124 | 246k | } | 125 | 12.0k | } |
Line | Count | Source | 114 | 55.0k | { | 115 | 55.0k | const pixel *src = (pixel*)_src; | 116 | 55.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 55.0k | const int8_t *filter = vf; | 118 | | | 119 | 810k | for (int y = 0; y < height; y++) { | 120 | 18.7M | for (int x = 0; x < width; x++) | 121 | 18.0M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 755k | src += src_stride; | 123 | 755k | dst += MAX_PB_SIZE; | 124 | 755k | } | 125 | 55.0k | } |
Line | Count | Source | 114 | 51.7k | { | 115 | 51.7k | const pixel *src = (pixel*)_src; | 116 | 51.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 51.7k | const int8_t *filter = vf; | 118 | | | 119 | 945k | for (int y = 0; y < height; y++) { | 120 | 24.4M | for (int x = 0; x < width; x++) | 121 | 23.5M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 893k | src += src_stride; | 123 | 893k | dst += MAX_PB_SIZE; | 124 | 893k | } | 125 | 51.7k | } |
Line | Count | Source | 114 | 55.0k | { | 115 | 55.0k | const pixel *src = (pixel*)_src; | 116 | 55.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 55.0k | const int8_t *filter = vf; | 118 | | | 119 | 810k | for (int y = 0; y < height; y++) { | 120 | 18.7M | for (int x = 0; x < width; x++) | 121 | 18.0M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 755k | src += src_stride; | 123 | 755k | dst += MAX_PB_SIZE; | 124 | 755k | } | 125 | 55.0k | } |
Line | Count | Source | 114 | 12.0k | { | 115 | 12.0k | const pixel *src = (pixel*)_src; | 116 | 12.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 12.0k | const int8_t *filter = vf; | 118 | | | 119 | 258k | for (int y = 0; y < height; y++) { | 120 | 8.90M | for (int x = 0; x < width; x++) | 121 | 8.65M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 246k | src += src_stride; | 123 | 246k | dst += MAX_PB_SIZE; | 124 | 246k | } | 125 | 12.0k | } |
Line | Count | Source | 114 | 51.7k | { | 115 | 51.7k | const pixel *src = (pixel*)_src; | 116 | 51.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 117 | 51.7k | const int8_t *filter = vf; | 118 | | | 119 | 945k | for (int y = 0; y < height; y++) { | 120 | 24.4M | for (int x = 0; x < width; x++) | 121 | 23.5M | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 122 | 893k | src += src_stride; | 123 | 893k | dst += MAX_PB_SIZE; | 124 | 893k | } | 125 | 51.7k | } |
|
126 | | |
127 | | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
128 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
129 | 735k | { |
130 | 735k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; |
131 | 735k | int16_t *tmp = tmp_array; |
132 | 735k | const pixel *src = (const pixel*)_src; |
133 | 735k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
134 | 735k | const int8_t *filter = hf; |
135 | | |
136 | 735k | src -= LUMA_EXTRA_BEFORE * src_stride; |
137 | 17.5M | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
138 | 386M | for (int x = 0; x < width; x++) |
139 | 369M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
140 | 16.7M | src += src_stride; |
141 | 16.7M | tmp += MAX_PB_SIZE; |
142 | 16.7M | } |
143 | | |
144 | 735k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; |
145 | 735k | filter = vf; |
146 | 12.3M | for (int y = 0; y < height; y++) { |
147 | 300M | for (int x = 0; x < width; x++) |
148 | 289M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; |
149 | 11.6M | tmp += MAX_PB_SIZE; |
150 | 11.6M | dst += MAX_PB_SIZE; |
151 | 11.6M | } |
152 | 735k | } Line | Count | Source | 129 | 67.6k | { | 130 | 67.6k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 67.6k | int16_t *tmp = tmp_array; | 132 | 67.6k | const pixel *src = (const pixel*)_src; | 133 | 67.6k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 67.6k | const int8_t *filter = hf; | 135 | | | 136 | 67.6k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 2.08M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 57.7M | for (int x = 0; x < width; x++) | 139 | 55.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 2.01M | src += src_stride; | 141 | 2.01M | tmp += MAX_PB_SIZE; | 142 | 2.01M | } | 143 | | | 144 | 67.6k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 67.6k | filter = vf; | 146 | 1.60M | for (int y = 0; y < height; y++) { | 147 | 46.0M | for (int x = 0; x < width; x++) | 148 | 44.4M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 1.53M | tmp += MAX_PB_SIZE; | 150 | 1.53M | dst += MAX_PB_SIZE; | 151 | 1.53M | } | 152 | 67.6k | } |
Line | Count | Source | 129 | 26.3k | { | 130 | 26.3k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 26.3k | int16_t *tmp = tmp_array; | 132 | 26.3k | const pixel *src = (const pixel*)_src; | 133 | 26.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 26.3k | const int8_t *filter = hf; | 135 | | | 136 | 26.3k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 775k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 23.4M | for (int x = 0; x < width; x++) | 139 | 22.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 748k | src += src_stride; | 141 | 748k | tmp += MAX_PB_SIZE; | 142 | 748k | } | 143 | | | 144 | 26.3k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 26.3k | filter = vf; | 146 | 590k | for (int y = 0; y < height; y++) { | 147 | 19.4M | for (int x = 0; x < width; x++) | 148 | 18.8M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 564k | tmp += MAX_PB_SIZE; | 150 | 564k | dst += MAX_PB_SIZE; | 151 | 564k | } | 152 | 26.3k | } |
Line | Count | Source | 129 | 152k | { | 130 | 152k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 152k | int16_t *tmp = tmp_array; | 132 | 152k | const pixel *src = (const pixel*)_src; | 133 | 152k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 152k | const int8_t *filter = hf; | 135 | | | 136 | 152k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 3.25M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 66.4M | for (int x = 0; x < width; x++) | 139 | 63.3M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 3.10M | src += src_stride; | 141 | 3.10M | tmp += MAX_PB_SIZE; | 142 | 3.10M | } | 143 | | | 144 | 152k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 152k | filter = vf; | 146 | 2.18M | for (int y = 0; y < height; y++) { | 147 | 51.2M | for (int x = 0; x < width; x++) | 148 | 49.2M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 2.03M | tmp += MAX_PB_SIZE; | 150 | 2.03M | dst += MAX_PB_SIZE; | 151 | 2.03M | } | 152 | 152k | } |
Line | Count | Source | 129 | 154k | { | 130 | 154k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 154k | int16_t *tmp = tmp_array; | 132 | 154k | const pixel *src = (const pixel*)_src; | 133 | 154k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 154k | const int8_t *filter = hf; | 135 | | | 136 | 154k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 3.68M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 74.2M | for (int x = 0; x < width; x++) | 139 | 70.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 3.53M | src += src_stride; | 141 | 3.53M | tmp += MAX_PB_SIZE; | 142 | 3.53M | } | 143 | | | 144 | 154k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 154k | filter = vf; | 146 | 2.60M | for (int y = 0; y < height; y++) { | 147 | 56.6M | for (int x = 0; x < width; x++) | 148 | 54.2M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 2.44M | tmp += MAX_PB_SIZE; | 150 | 2.44M | dst += MAX_PB_SIZE; | 151 | 2.44M | } | 152 | 154k | } |
Line | Count | Source | 129 | 152k | { | 130 | 152k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 152k | int16_t *tmp = tmp_array; | 132 | 152k | const pixel *src = (const pixel*)_src; | 133 | 152k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 152k | const int8_t *filter = hf; | 135 | | | 136 | 152k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 3.25M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 66.4M | for (int x = 0; x < width; x++) | 139 | 63.3M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 3.10M | src += src_stride; | 141 | 3.10M | tmp += MAX_PB_SIZE; | 142 | 3.10M | } | 143 | | | 144 | 152k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 152k | filter = vf; | 146 | 2.18M | for (int y = 0; y < height; y++) { | 147 | 51.2M | for (int x = 0; x < width; x++) | 148 | 49.2M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 2.03M | tmp += MAX_PB_SIZE; | 150 | 2.03M | dst += MAX_PB_SIZE; | 151 | 2.03M | } | 152 | 152k | } |
Line | Count | Source | 129 | 26.3k | { | 130 | 26.3k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 26.3k | int16_t *tmp = tmp_array; | 132 | 26.3k | const pixel *src = (const pixel*)_src; | 133 | 26.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 26.3k | const int8_t *filter = hf; | 135 | | | 136 | 26.3k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 775k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 23.4M | for (int x = 0; x < width; x++) | 139 | 22.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 748k | src += src_stride; | 141 | 748k | tmp += MAX_PB_SIZE; | 142 | 748k | } | 143 | | | 144 | 26.3k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 26.3k | filter = vf; | 146 | 590k | for (int y = 0; y < height; y++) { | 147 | 19.4M | for (int x = 0; x < width; x++) | 148 | 18.8M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 564k | tmp += MAX_PB_SIZE; | 150 | 564k | dst += MAX_PB_SIZE; | 151 | 564k | } | 152 | 26.3k | } |
Line | Count | Source | 129 | 154k | { | 130 | 154k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 131 | 154k | int16_t *tmp = tmp_array; | 132 | 154k | const pixel *src = (const pixel*)_src; | 133 | 154k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 134 | 154k | const int8_t *filter = hf; | 135 | | | 136 | 154k | src -= LUMA_EXTRA_BEFORE * src_stride; | 137 | 3.68M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 138 | 74.2M | for (int x = 0; x < width; x++) | 139 | 70.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 140 | 3.53M | src += src_stride; | 141 | 3.53M | tmp += MAX_PB_SIZE; | 142 | 3.53M | } | 143 | | | 144 | 154k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 145 | 154k | filter = vf; | 146 | 2.60M | for (int y = 0; y < height; y++) { | 147 | 56.6M | for (int x = 0; x < width; x++) | 148 | 54.2M | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 149 | 2.44M | tmp += MAX_PB_SIZE; | 150 | 2.44M | dst += MAX_PB_SIZE; | 151 | 2.44M | } | 152 | 154k | } |
|
153 | | |
154 | | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
155 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
156 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
157 | 559k | { |
158 | 559k | const pixel *src = (const pixel*)_src; |
159 | 559k | pixel *dst = (pixel *)_dst; |
160 | 559k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
161 | 559k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
162 | 559k | const int8_t *filter = hf; |
163 | 559k | const int shift = 14 - BIT_DEPTH; |
164 | 559k | #if BIT_DEPTH < 14 |
165 | 559k | const int offset = 1 << (shift - 1); |
166 | | #else |
167 | | const int offset = 0; |
168 | | #endif |
169 | | |
170 | 7.15M | for (int y = 0; y < height; y++) { |
171 | 137M | for (int x = 0; x < width; x++) { |
172 | 130M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
173 | 130M | dst[x] = av_clip_pixel((val + offset) >> shift); |
174 | 130M | } |
175 | 6.59M | src += src_stride; |
176 | 6.59M | dst += dst_stride; |
177 | 6.59M | } |
178 | 559k | } Line | Count | Source | 157 | 13.7k | { | 158 | 13.7k | const pixel *src = (const pixel*)_src; | 159 | 13.7k | pixel *dst = (pixel *)_dst; | 160 | 13.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 13.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 13.7k | const int8_t *filter = hf; | 163 | 13.7k | const int shift = 14 - BIT_DEPTH; | 164 | 13.7k | #if BIT_DEPTH < 14 | 165 | 13.7k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 198k | for (int y = 0; y < height; y++) { | 171 | 4.29M | for (int x = 0; x < width; x++) { | 172 | 4.10M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 4.10M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 4.10M | } | 175 | 184k | src += src_stride; | 176 | 184k | dst += dst_stride; | 177 | 184k | } | 178 | 13.7k | } |
Line | Count | Source | 157 | 17.4k | { | 158 | 17.4k | const pixel *src = (const pixel*)_src; | 159 | 17.4k | pixel *dst = (pixel *)_dst; | 160 | 17.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 17.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 17.4k | const int8_t *filter = hf; | 163 | 17.4k | const int shift = 14 - BIT_DEPTH; | 164 | 17.4k | #if BIT_DEPTH < 14 | 165 | 17.4k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 225k | for (int y = 0; y < height; y++) { | 171 | 3.66M | for (int x = 0; x < width; x++) { | 172 | 3.45M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 3.45M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 3.45M | } | 175 | 207k | src += src_stride; | 176 | 207k | dst += dst_stride; | 177 | 207k | } | 178 | 17.4k | } |
Line | Count | Source | 157 | 96.6k | { | 158 | 96.6k | const pixel *src = (const pixel*)_src; | 159 | 96.6k | pixel *dst = (pixel *)_dst; | 160 | 96.6k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 96.6k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 96.6k | const int8_t *filter = hf; | 163 | 96.6k | const int shift = 14 - BIT_DEPTH; | 164 | 96.6k | #if BIT_DEPTH < 14 | 165 | 96.6k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 1.17M | for (int y = 0; y < height; y++) { | 171 | 22.2M | for (int x = 0; x < width; x++) { | 172 | 21.2M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 21.2M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 21.2M | } | 175 | 1.07M | src += src_stride; | 176 | 1.07M | dst += dst_stride; | 177 | 1.07M | } | 178 | 96.6k | } |
Line | Count | Source | 157 | 158k | { | 158 | 158k | const pixel *src = (const pixel*)_src; | 159 | 158k | pixel *dst = (pixel *)_dst; | 160 | 158k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 158k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 158k | const int8_t *filter = hf; | 163 | 158k | const int shift = 14 - BIT_DEPTH; | 164 | 158k | #if BIT_DEPTH < 14 | 165 | 158k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 2.08M | for (int y = 0; y < height; y++) { | 171 | 40.4M | for (int x = 0; x < width; x++) { | 172 | 38.4M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 38.4M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 38.4M | } | 175 | 1.92M | src += src_stride; | 176 | 1.92M | dst += dst_stride; | 177 | 1.92M | } | 178 | 158k | } |
Line | Count | Source | 157 | 96.6k | { | 158 | 96.6k | const pixel *src = (const pixel*)_src; | 159 | 96.6k | pixel *dst = (pixel *)_dst; | 160 | 96.6k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 96.6k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 96.6k | const int8_t *filter = hf; | 163 | 96.6k | const int shift = 14 - BIT_DEPTH; | 164 | 96.6k | #if BIT_DEPTH < 14 | 165 | 96.6k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 1.17M | for (int y = 0; y < height; y++) { | 171 | 22.2M | for (int x = 0; x < width; x++) { | 172 | 21.2M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 21.2M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 21.2M | } | 175 | 1.07M | src += src_stride; | 176 | 1.07M | dst += dst_stride; | 177 | 1.07M | } | 178 | 96.6k | } |
Line | Count | Source | 157 | 17.4k | { | 158 | 17.4k | const pixel *src = (const pixel*)_src; | 159 | 17.4k | pixel *dst = (pixel *)_dst; | 160 | 17.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 17.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 17.4k | const int8_t *filter = hf; | 163 | 17.4k | const int shift = 14 - BIT_DEPTH; | 164 | 17.4k | #if BIT_DEPTH < 14 | 165 | 17.4k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 225k | for (int y = 0; y < height; y++) { | 171 | 3.66M | for (int x = 0; x < width; x++) { | 172 | 3.45M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 3.45M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 3.45M | } | 175 | 207k | src += src_stride; | 176 | 207k | dst += dst_stride; | 177 | 207k | } | 178 | 17.4k | } |
Line | Count | Source | 157 | 158k | { | 158 | 158k | const pixel *src = (const pixel*)_src; | 159 | 158k | pixel *dst = (pixel *)_dst; | 160 | 158k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 161 | 158k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 162 | 158k | const int8_t *filter = hf; | 163 | 158k | const int shift = 14 - BIT_DEPTH; | 164 | 158k | #if BIT_DEPTH < 14 | 165 | 158k | const int offset = 1 << (shift - 1); | 166 | | #else | 167 | | const int offset = 0; | 168 | | #endif | 169 | | | 170 | 2.08M | for (int y = 0; y < height; y++) { | 171 | 40.4M | for (int x = 0; x < width; x++) { | 172 | 38.4M | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 173 | 38.4M | dst[x] = av_clip_pixel((val + offset) >> shift); | 174 | 38.4M | } | 175 | 1.92M | src += src_stride; | 176 | 1.92M | dst += dst_stride; | 177 | 1.92M | } | 178 | 158k | } |
|
179 | | |
180 | | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
181 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
182 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
183 | 572k | { |
184 | | |
185 | 572k | const pixel *src = (const pixel*)_src; |
186 | 572k | pixel *dst = (pixel *)_dst; |
187 | 572k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
188 | 572k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
189 | 572k | const int8_t *filter = vf; |
190 | 572k | const int shift = 14 - BIT_DEPTH; |
191 | 572k | #if BIT_DEPTH < 14 |
192 | 572k | const int offset = 1 << (shift - 1); |
193 | | #else |
194 | | const int offset = 0; |
195 | | #endif |
196 | | |
197 | 6.33M | for (int y = 0; y < height; y++) { |
198 | 98.5M | for (int x = 0; x < width; x++) { |
199 | 92.7M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); |
200 | 92.7M | dst[x] = av_clip_pixel((val + offset) >> shift); |
201 | 92.7M | } |
202 | 5.76M | src += src_stride; |
203 | 5.76M | dst += dst_stride; |
204 | 5.76M | } |
205 | 572k | } Line | Count | Source | 183 | 13.7k | { | 184 | | | 185 | 13.7k | const pixel *src = (const pixel*)_src; | 186 | 13.7k | pixel *dst = (pixel *)_dst; | 187 | 13.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 13.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 13.7k | const int8_t *filter = vf; | 190 | 13.7k | const int shift = 14 - BIT_DEPTH; | 191 | 13.7k | #if BIT_DEPTH < 14 | 192 | 13.7k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 162k | for (int y = 0; y < height; y++) { | 198 | 2.98M | for (int x = 0; x < width; x++) { | 199 | 2.83M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 2.83M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 2.83M | } | 202 | 148k | src += src_stride; | 203 | 148k | dst += dst_stride; | 204 | 148k | } | 205 | 13.7k | } |
Line | Count | Source | 183 | 33.8k | { | 184 | | | 185 | 33.8k | const pixel *src = (const pixel*)_src; | 186 | 33.8k | pixel *dst = (pixel *)_dst; | 187 | 33.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 33.8k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 33.8k | const int8_t *filter = vf; | 190 | 33.8k | const int shift = 14 - BIT_DEPTH; | 191 | 33.8k | #if BIT_DEPTH < 14 | 192 | 33.8k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 376k | for (int y = 0; y < height; y++) { | 198 | 5.70M | for (int x = 0; x < width; x++) { | 199 | 5.35M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 5.35M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 5.35M | } | 202 | 343k | src += src_stride; | 203 | 343k | dst += dst_stride; | 204 | 343k | } | 205 | 33.8k | } |
Line | Count | Source | 183 | 119k | { | 184 | | | 185 | 119k | const pixel *src = (const pixel*)_src; | 186 | 119k | pixel *dst = (pixel *)_dst; | 187 | 119k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 119k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 119k | const int8_t *filter = vf; | 190 | 119k | const int shift = 14 - BIT_DEPTH; | 191 | 119k | #if BIT_DEPTH < 14 | 192 | 119k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 1.16M | for (int y = 0; y < height; y++) { | 198 | 14.0M | for (int x = 0; x < width; x++) { | 199 | 12.9M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 12.9M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 12.9M | } | 202 | 1.04M | src += src_stride; | 203 | 1.04M | dst += dst_stride; | 204 | 1.04M | } | 205 | 119k | } |
Line | Count | Source | 183 | 126k | { | 184 | | | 185 | 126k | const pixel *src = (const pixel*)_src; | 186 | 126k | pixel *dst = (pixel *)_dst; | 187 | 126k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 126k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 126k | const int8_t *filter = vf; | 190 | 126k | const int shift = 14 - BIT_DEPTH; | 191 | 126k | #if BIT_DEPTH < 14 | 192 | 126k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 1.54M | for (int y = 0; y < height; y++) { | 198 | 28.0M | for (int x = 0; x < width; x++) { | 199 | 26.6M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 26.6M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 26.6M | } | 202 | 1.41M | src += src_stride; | 203 | 1.41M | dst += dst_stride; | 204 | 1.41M | } | 205 | 126k | } |
Line | Count | Source | 183 | 119k | { | 184 | | | 185 | 119k | const pixel *src = (const pixel*)_src; | 186 | 119k | pixel *dst = (pixel *)_dst; | 187 | 119k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 119k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 119k | const int8_t *filter = vf; | 190 | 119k | const int shift = 14 - BIT_DEPTH; | 191 | 119k | #if BIT_DEPTH < 14 | 192 | 119k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 1.16M | for (int y = 0; y < height; y++) { | 198 | 14.0M | for (int x = 0; x < width; x++) { | 199 | 12.9M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 12.9M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 12.9M | } | 202 | 1.04M | src += src_stride; | 203 | 1.04M | dst += dst_stride; | 204 | 1.04M | } | 205 | 119k | } |
Line | Count | Source | 183 | 33.8k | { | 184 | | | 185 | 33.8k | const pixel *src = (const pixel*)_src; | 186 | 33.8k | pixel *dst = (pixel *)_dst; | 187 | 33.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 33.8k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 33.8k | const int8_t *filter = vf; | 190 | 33.8k | const int shift = 14 - BIT_DEPTH; | 191 | 33.8k | #if BIT_DEPTH < 14 | 192 | 33.8k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 376k | for (int y = 0; y < height; y++) { | 198 | 5.70M | for (int x = 0; x < width; x++) { | 199 | 5.35M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 5.35M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 5.35M | } | 202 | 343k | src += src_stride; | 203 | 343k | dst += dst_stride; | 204 | 343k | } | 205 | 33.8k | } |
Line | Count | Source | 183 | 126k | { | 184 | | | 185 | 126k | const pixel *src = (const pixel*)_src; | 186 | 126k | pixel *dst = (pixel *)_dst; | 187 | 126k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 188 | 126k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 189 | 126k | const int8_t *filter = vf; | 190 | 126k | const int shift = 14 - BIT_DEPTH; | 191 | 126k | #if BIT_DEPTH < 14 | 192 | 126k | const int offset = 1 << (shift - 1); | 193 | | #else | 194 | | const int offset = 0; | 195 | | #endif | 196 | | | 197 | 1.54M | for (int y = 0; y < height; y++) { | 198 | 28.0M | for (int x = 0; x < width; x++) { | 199 | 26.6M | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 200 | 26.6M | dst[x] = av_clip_pixel((val + offset) >> shift); | 201 | 26.6M | } | 202 | 1.41M | src += src_stride; | 203 | 1.41M | dst += dst_stride; | 204 | 1.41M | } | 205 | 126k | } |
|
206 | | |
207 | | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
208 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
209 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
210 | 1.36M | { |
211 | 1.36M | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; |
212 | 1.36M | int16_t *tmp = tmp_array; |
213 | 1.36M | const pixel *src = (const pixel*)_src; |
214 | 1.36M | pixel *dst = (pixel *)_dst; |
215 | 1.36M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
216 | 1.36M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
217 | 1.36M | const int8_t *filter = hf; |
218 | 1.36M | const int shift = 14 - BIT_DEPTH; |
219 | 1.36M | #if BIT_DEPTH < 14 |
220 | 1.36M | const int offset = 1 << (shift - 1); |
221 | | #else |
222 | | const int offset = 0; |
223 | | #endif |
224 | | |
225 | 1.36M | src -= LUMA_EXTRA_BEFORE * src_stride; |
226 | 25.1M | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
227 | 347M | for (int x = 0; x < width; x++) |
228 | 323M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
229 | 23.7M | src += src_stride; |
230 | 23.7M | tmp += MAX_PB_SIZE; |
231 | 23.7M | } |
232 | | |
233 | 1.36M | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; |
234 | 1.36M | filter = vf; |
235 | | |
236 | 15.5M | for (int y = 0; y < height; y++) { |
237 | 244M | for (int x = 0; x < width; x++) { |
238 | 230M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; |
239 | 230M | dst[x] = av_clip_pixel((val + offset) >> shift); |
240 | 230M | } |
241 | 14.1M | tmp += MAX_PB_SIZE; |
242 | 14.1M | dst += dst_stride; |
243 | 14.1M | } |
244 | | |
245 | 1.36M | } Line | Count | Source | 210 | 32.9k | { | 211 | 32.9k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 32.9k | int16_t *tmp = tmp_array; | 213 | 32.9k | const pixel *src = (const pixel*)_src; | 214 | 32.9k | pixel *dst = (pixel *)_dst; | 215 | 32.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 32.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 32.9k | const int8_t *filter = hf; | 218 | 32.9k | const int shift = 14 - BIT_DEPTH; | 219 | 32.9k | #if BIT_DEPTH < 14 | 220 | 32.9k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 32.9k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 659k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 11.1M | for (int x = 0; x < width; x++) | 228 | 10.5M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 626k | src += src_stride; | 230 | 626k | tmp += MAX_PB_SIZE; | 231 | 626k | } | 232 | | | 233 | 32.9k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 32.9k | filter = vf; | 235 | | | 236 | 429k | for (int y = 0; y < height; y++) { | 237 | 8.09M | for (int x = 0; x < width; x++) { | 238 | 7.69M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 7.69M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 7.69M | } | 241 | 396k | tmp += MAX_PB_SIZE; | 242 | 396k | dst += dst_stride; | 243 | 396k | } | 244 | | | 245 | 32.9k | } |
Line | Count | Source | 210 | 32.0k | { | 211 | 32.0k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 32.0k | int16_t *tmp = tmp_array; | 213 | 32.0k | const pixel *src = (const pixel*)_src; | 214 | 32.0k | pixel *dst = (pixel *)_dst; | 215 | 32.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 32.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 32.0k | const int8_t *filter = hf; | 218 | 32.0k | const int shift = 14 - BIT_DEPTH; | 219 | 32.0k | #if BIT_DEPTH < 14 | 220 | 32.0k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 32.0k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 584k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 7.03M | for (int x = 0; x < width; x++) | 228 | 6.48M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 552k | src += src_stride; | 230 | 552k | tmp += MAX_PB_SIZE; | 231 | 552k | } | 232 | | | 233 | 32.0k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 32.0k | filter = vf; | 235 | | | 236 | 360k | for (int y = 0; y < height; y++) { | 237 | 4.69M | for (int x = 0; x < width; x++) { | 238 | 4.37M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 4.37M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 4.37M | } | 241 | 328k | tmp += MAX_PB_SIZE; | 242 | 328k | dst += dst_stride; | 243 | 328k | } | 244 | | | 245 | 32.0k | } |
Line | Count | Source | 210 | 282k | { | 211 | 282k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 282k | int16_t *tmp = tmp_array; | 213 | 282k | const pixel *src = (const pixel*)_src; | 214 | 282k | pixel *dst = (pixel *)_dst; | 215 | 282k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 282k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 282k | const int8_t *filter = hf; | 218 | 282k | const int shift = 14 - BIT_DEPTH; | 219 | 282k | #if BIT_DEPTH < 14 | 220 | 282k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 282k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 4.88M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 59.3M | for (int x = 0; x < width; x++) | 228 | 54.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 4.60M | src += src_stride; | 230 | 4.60M | tmp += MAX_PB_SIZE; | 231 | 4.60M | } | 232 | | | 233 | 282k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 282k | filter = vf; | 235 | | | 236 | 2.91M | for (int y = 0; y < height; y++) { | 237 | 40.3M | for (int x = 0; x < width; x++) { | 238 | 37.7M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 37.7M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 37.7M | } | 241 | 2.62M | tmp += MAX_PB_SIZE; | 242 | 2.62M | dst += dst_stride; | 243 | 2.62M | } | 244 | | | 245 | 282k | } |
Line | Count | Source | 210 | 352k | { | 211 | 352k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 352k | int16_t *tmp = tmp_array; | 213 | 352k | const pixel *src = (const pixel*)_src; | 214 | 352k | pixel *dst = (pixel *)_dst; | 215 | 352k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 352k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 352k | const int8_t *filter = hf; | 218 | 352k | const int shift = 14 - BIT_DEPTH; | 219 | 352k | #if BIT_DEPTH < 14 | 220 | 352k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 352k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 6.75M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 101M | for (int x = 0; x < width; x++) | 228 | 95.2M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 6.40M | src += src_stride; | 230 | 6.40M | tmp += MAX_PB_SIZE; | 231 | 6.40M | } | 232 | | | 233 | 352k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 352k | filter = vf; | 235 | | | 236 | 4.28M | for (int y = 0; y < height; y++) { | 237 | 73.0M | for (int x = 0; x < width; x++) { | 238 | 69.0M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 69.0M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 69.0M | } | 241 | 3.93M | tmp += MAX_PB_SIZE; | 242 | 3.93M | dst += dst_stride; | 243 | 3.93M | } | 244 | | | 245 | 352k | } |
Line | Count | Source | 210 | 282k | { | 211 | 282k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 282k | int16_t *tmp = tmp_array; | 213 | 282k | const pixel *src = (const pixel*)_src; | 214 | 282k | pixel *dst = (pixel *)_dst; | 215 | 282k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 282k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 282k | const int8_t *filter = hf; | 218 | 282k | const int shift = 14 - BIT_DEPTH; | 219 | 282k | #if BIT_DEPTH < 14 | 220 | 282k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 282k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 4.88M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 59.3M | for (int x = 0; x < width; x++) | 228 | 54.7M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 4.60M | src += src_stride; | 230 | 4.60M | tmp += MAX_PB_SIZE; | 231 | 4.60M | } | 232 | | | 233 | 282k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 282k | filter = vf; | 235 | | | 236 | 2.91M | for (int y = 0; y < height; y++) { | 237 | 40.3M | for (int x = 0; x < width; x++) { | 238 | 37.7M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 37.7M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 37.7M | } | 241 | 2.62M | tmp += MAX_PB_SIZE; | 242 | 2.62M | dst += dst_stride; | 243 | 2.62M | } | 244 | | | 245 | 282k | } |
Line | Count | Source | 210 | 32.0k | { | 211 | 32.0k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 32.0k | int16_t *tmp = tmp_array; | 213 | 32.0k | const pixel *src = (const pixel*)_src; | 214 | 32.0k | pixel *dst = (pixel *)_dst; | 215 | 32.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 32.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 32.0k | const int8_t *filter = hf; | 218 | 32.0k | const int shift = 14 - BIT_DEPTH; | 219 | 32.0k | #if BIT_DEPTH < 14 | 220 | 32.0k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 32.0k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 584k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 7.03M | for (int x = 0; x < width; x++) | 228 | 6.48M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 552k | src += src_stride; | 230 | 552k | tmp += MAX_PB_SIZE; | 231 | 552k | } | 232 | | | 233 | 32.0k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 32.0k | filter = vf; | 235 | | | 236 | 360k | for (int y = 0; y < height; y++) { | 237 | 4.69M | for (int x = 0; x < width; x++) { | 238 | 4.37M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 4.37M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 4.37M | } | 241 | 328k | tmp += MAX_PB_SIZE; | 242 | 328k | dst += dst_stride; | 243 | 328k | } | 244 | | | 245 | 32.0k | } |
Line | Count | Source | 210 | 352k | { | 211 | 352k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 212 | 352k | int16_t *tmp = tmp_array; | 213 | 352k | const pixel *src = (const pixel*)_src; | 214 | 352k | pixel *dst = (pixel *)_dst; | 215 | 352k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 216 | 352k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 217 | 352k | const int8_t *filter = hf; | 218 | 352k | const int shift = 14 - BIT_DEPTH; | 219 | 352k | #if BIT_DEPTH < 14 | 220 | 352k | const int offset = 1 << (shift - 1); | 221 | | #else | 222 | | const int offset = 0; | 223 | | #endif | 224 | | | 225 | 352k | src -= LUMA_EXTRA_BEFORE * src_stride; | 226 | 6.75M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 227 | 101M | for (int x = 0; x < width; x++) | 228 | 95.2M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 229 | 6.40M | src += src_stride; | 230 | 6.40M | tmp += MAX_PB_SIZE; | 231 | 6.40M | } | 232 | | | 233 | 352k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 234 | 352k | filter = vf; | 235 | | | 236 | 4.28M | for (int y = 0; y < height; y++) { | 237 | 73.0M | for (int x = 0; x < width; x++) { | 238 | 69.0M | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 239 | 69.0M | dst[x] = av_clip_pixel((val + offset) >> shift); | 240 | 69.0M | } | 241 | 3.93M | tmp += MAX_PB_SIZE; | 242 | 3.93M | dst += dst_stride; | 243 | 3.93M | } | 244 | | | 245 | 352k | } |
|
246 | | |
247 | | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
248 | | const uint8_t *_src, const ptrdiff_t _src_stride, int height, |
249 | | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, |
250 | | const int width) |
251 | 147k | { |
252 | 147k | const pixel *src = (const pixel*)_src; |
253 | 147k | pixel *dst = (pixel *)_dst; |
254 | 147k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
255 | 147k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
256 | 147k | const int8_t *filter = hf; |
257 | 147k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
258 | 147k | const int shift = denom + 14 - BIT_DEPTH; |
259 | 147k | #if BIT_DEPTH < 14 |
260 | 147k | const int offset = 1 << (shift - 1); |
261 | | #else |
262 | | const int offset = 0; |
263 | | #endif |
264 | | |
265 | 1.98M | for (int y = 0; y < height; y++) { |
266 | 38.8M | for (int x = 0; x < width; x++) |
267 | 37.0M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
268 | 1.83M | src += src_stride; |
269 | 1.83M | dst += dst_stride; |
270 | 1.83M | } |
271 | 147k | } Line | Count | Source | 251 | 10.1k | { | 252 | 10.1k | const pixel *src = (const pixel*)_src; | 253 | 10.1k | pixel *dst = (pixel *)_dst; | 254 | 10.1k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 10.1k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 10.1k | const int8_t *filter = hf; | 257 | 10.1k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 10.1k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 10.1k | #if BIT_DEPTH < 14 | 260 | 10.1k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 204k | for (int y = 0; y < height; y++) { | 266 | 5.38M | for (int x = 0; x < width; x++) | 267 | 5.18M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 194k | src += src_stride; | 269 | 194k | dst += dst_stride; | 270 | 194k | } | 271 | 10.1k | } |
dsp.c:put_uni_luma_w_h_10 Line | Count | Source | 251 | 12.3k | { | 252 | 12.3k | const pixel *src = (const pixel*)_src; | 253 | 12.3k | pixel *dst = (pixel *)_dst; | 254 | 12.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 12.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 12.3k | const int8_t *filter = hf; | 257 | 12.3k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 12.3k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 12.3k | #if BIT_DEPTH < 14 | 260 | 12.3k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 202k | for (int y = 0; y < height; y++) { | 266 | 4.98M | for (int x = 0; x < width; x++) | 267 | 4.79M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 190k | src += src_stride; | 269 | 190k | dst += dst_stride; | 270 | 190k | } | 271 | 12.3k | } |
dsp.c:put_uni_luma_w_h_12 Line | Count | Source | 251 | 30.9k | { | 252 | 30.9k | const pixel *src = (const pixel*)_src; | 253 | 30.9k | pixel *dst = (pixel *)_dst; | 254 | 30.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 30.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 30.9k | const int8_t *filter = hf; | 257 | 30.9k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 30.9k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 30.9k | #if BIT_DEPTH < 14 | 260 | 30.9k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 394k | for (int y = 0; y < height; y++) { | 266 | 7.37M | for (int x = 0; x < width; x++) | 267 | 7.00M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 363k | src += src_stride; | 269 | 363k | dst += dst_stride; | 270 | 363k | } | 271 | 30.9k | } |
Line | Count | Source | 251 | 25.4k | { | 252 | 25.4k | const pixel *src = (const pixel*)_src; | 253 | 25.4k | pixel *dst = (pixel *)_dst; | 254 | 25.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 25.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 25.4k | const int8_t *filter = hf; | 257 | 25.4k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 25.4k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 25.4k | #if BIT_DEPTH < 14 | 260 | 25.4k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 289k | for (int y = 0; y < height; y++) { | 266 | 4.36M | for (int x = 0; x < width; x++) | 267 | 4.10M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 264k | src += src_stride; | 269 | 264k | dst += dst_stride; | 270 | 264k | } | 271 | 25.4k | } |
dsp.c:put_uni_luma_w_h_12 Line | Count | Source | 251 | 30.9k | { | 252 | 30.9k | const pixel *src = (const pixel*)_src; | 253 | 30.9k | pixel *dst = (pixel *)_dst; | 254 | 30.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 30.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 30.9k | const int8_t *filter = hf; | 257 | 30.9k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 30.9k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 30.9k | #if BIT_DEPTH < 14 | 260 | 30.9k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 394k | for (int y = 0; y < height; y++) { | 266 | 7.37M | for (int x = 0; x < width; x++) | 267 | 7.00M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 363k | src += src_stride; | 269 | 363k | dst += dst_stride; | 270 | 363k | } | 271 | 30.9k | } |
dsp.c:put_uni_luma_w_h_10 Line | Count | Source | 251 | 12.3k | { | 252 | 12.3k | const pixel *src = (const pixel*)_src; | 253 | 12.3k | pixel *dst = (pixel *)_dst; | 254 | 12.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 12.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 12.3k | const int8_t *filter = hf; | 257 | 12.3k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 12.3k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 12.3k | #if BIT_DEPTH < 14 | 260 | 12.3k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 202k | for (int y = 0; y < height; y++) { | 266 | 4.98M | for (int x = 0; x < width; x++) | 267 | 4.79M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 190k | src += src_stride; | 269 | 190k | dst += dst_stride; | 270 | 190k | } | 271 | 12.3k | } |
Line | Count | Source | 251 | 25.4k | { | 252 | 25.4k | const pixel *src = (const pixel*)_src; | 253 | 25.4k | pixel *dst = (pixel *)_dst; | 254 | 25.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 255 | 25.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 256 | 25.4k | const int8_t *filter = hf; | 257 | 25.4k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 258 | 25.4k | const int shift = denom + 14 - BIT_DEPTH; | 259 | 25.4k | #if BIT_DEPTH < 14 | 260 | 25.4k | const int offset = 1 << (shift - 1); | 261 | | #else | 262 | | const int offset = 0; | 263 | | #endif | 264 | | | 265 | 289k | for (int y = 0; y < height; y++) { | 266 | 4.36M | for (int x = 0; x < width; x++) | 267 | 4.10M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 268 | 264k | src += src_stride; | 269 | 264k | dst += dst_stride; | 270 | 264k | } | 271 | 25.4k | } |
|
272 | | |
273 | | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
274 | | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, |
275 | | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, |
276 | | const int width) |
277 | 150k | { |
278 | 150k | const pixel *src = (const pixel*)_src; |
279 | 150k | pixel *dst = (pixel *)_dst; |
280 | 150k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
281 | 150k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
282 | 150k | const int8_t *filter = vf; |
283 | 150k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
284 | 150k | const int shift = denom + 14 - BIT_DEPTH; |
285 | 150k | #if BIT_DEPTH < 14 |
286 | 150k | const int offset = 1 << (shift - 1); |
287 | | #else |
288 | | const int offset = 0; |
289 | | #endif |
290 | | |
291 | 2.04M | for (int y = 0; y < height; y++) { |
292 | 40.0M | for (int x = 0; x < width; x++) |
293 | 38.1M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
294 | 1.88M | src += src_stride; |
295 | 1.88M | dst += dst_stride; |
296 | 1.88M | } |
297 | 150k | } Line | Count | Source | 277 | 10.6k | { | 278 | 10.6k | const pixel *src = (const pixel*)_src; | 279 | 10.6k | pixel *dst = (pixel *)_dst; | 280 | 10.6k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 10.6k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 10.6k | const int8_t *filter = vf; | 283 | 10.6k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 10.6k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 10.6k | #if BIT_DEPTH < 14 | 286 | 10.6k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 179k | for (int y = 0; y < height; y++) { | 292 | 4.20M | for (int x = 0; x < width; x++) | 293 | 4.03M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 169k | src += src_stride; | 295 | 169k | dst += dst_stride; | 296 | 169k | } | 297 | 10.6k | } |
dsp.c:put_uni_luma_w_v_10 Line | Count | Source | 277 | 15.2k | { | 278 | 15.2k | const pixel *src = (const pixel*)_src; | 279 | 15.2k | pixel *dst = (pixel *)_dst; | 280 | 15.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 15.2k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 15.2k | const int8_t *filter = vf; | 283 | 15.2k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 15.2k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 15.2k | #if BIT_DEPTH < 14 | 286 | 15.2k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 230k | for (int y = 0; y < height; y++) { | 292 | 5.47M | for (int x = 0; x < width; x++) | 293 | 5.25M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 215k | src += src_stride; | 295 | 215k | dst += dst_stride; | 296 | 215k | } | 297 | 15.2k | } |
dsp.c:put_uni_luma_w_v_12 Line | Count | Source | 277 | 25.1k | { | 278 | 25.1k | const pixel *src = (const pixel*)_src; | 279 | 25.1k | pixel *dst = (pixel *)_dst; | 280 | 25.1k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 25.1k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 25.1k | const int8_t *filter = vf; | 283 | 25.1k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 25.1k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 25.1k | #if BIT_DEPTH < 14 | 286 | 25.1k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 347k | for (int y = 0; y < height; y++) { | 292 | 6.90M | for (int x = 0; x < width; x++) | 293 | 6.58M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 322k | src += src_stride; | 295 | 322k | dst += dst_stride; | 296 | 322k | } | 297 | 25.1k | } |
Line | Count | Source | 277 | 29.7k | { | 278 | 29.7k | const pixel *src = (const pixel*)_src; | 279 | 29.7k | pixel *dst = (pixel *)_dst; | 280 | 29.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 29.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 29.7k | const int8_t *filter = vf; | 283 | 29.7k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 29.7k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 29.7k | #if BIT_DEPTH < 14 | 286 | 29.7k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 352k | for (int y = 0; y < height; y++) { | 292 | 5.54M | for (int x = 0; x < width; x++) | 293 | 5.22M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 322k | src += src_stride; | 295 | 322k | dst += dst_stride; | 296 | 322k | } | 297 | 29.7k | } |
dsp.c:put_uni_luma_w_v_12 Line | Count | Source | 277 | 25.1k | { | 278 | 25.1k | const pixel *src = (const pixel*)_src; | 279 | 25.1k | pixel *dst = (pixel *)_dst; | 280 | 25.1k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 25.1k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 25.1k | const int8_t *filter = vf; | 283 | 25.1k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 25.1k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 25.1k | #if BIT_DEPTH < 14 | 286 | 25.1k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 347k | for (int y = 0; y < height; y++) { | 292 | 6.90M | for (int x = 0; x < width; x++) | 293 | 6.58M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 322k | src += src_stride; | 295 | 322k | dst += dst_stride; | 296 | 322k | } | 297 | 25.1k | } |
dsp.c:put_uni_luma_w_v_10 Line | Count | Source | 277 | 15.2k | { | 278 | 15.2k | const pixel *src = (const pixel*)_src; | 279 | 15.2k | pixel *dst = (pixel *)_dst; | 280 | 15.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 15.2k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 15.2k | const int8_t *filter = vf; | 283 | 15.2k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 15.2k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 15.2k | #if BIT_DEPTH < 14 | 286 | 15.2k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 230k | for (int y = 0; y < height; y++) { | 292 | 5.47M | for (int x = 0; x < width; x++) | 293 | 5.25M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 215k | src += src_stride; | 295 | 215k | dst += dst_stride; | 296 | 215k | } | 297 | 15.2k | } |
Line | Count | Source | 277 | 29.7k | { | 278 | 29.7k | const pixel *src = (const pixel*)_src; | 279 | 29.7k | pixel *dst = (pixel *)_dst; | 280 | 29.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 281 | 29.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 282 | 29.7k | const int8_t *filter = vf; | 283 | 29.7k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 284 | 29.7k | const int shift = denom + 14 - BIT_DEPTH; | 285 | 29.7k | #if BIT_DEPTH < 14 | 286 | 29.7k | const int offset = 1 << (shift - 1); | 287 | | #else | 288 | | const int offset = 0; | 289 | | #endif | 290 | | | 291 | 352k | for (int y = 0; y < height; y++) { | 292 | 5.54M | for (int x = 0; x < width; x++) | 293 | 5.22M | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 294 | 322k | src += src_stride; | 295 | 322k | dst += dst_stride; | 296 | 322k | } | 297 | 29.7k | } |
|
298 | | |
299 | | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
300 | | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, |
301 | | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) |
302 | 433k | { |
303 | 433k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; |
304 | 433k | int16_t *tmp = tmp_array; |
305 | 433k | const pixel *src = (const pixel*)_src; |
306 | 433k | pixel *dst = (pixel *)_dst; |
307 | 433k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
308 | 433k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
309 | 433k | const int8_t *filter = hf; |
310 | 433k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
311 | 433k | const int shift = denom + 14 - BIT_DEPTH; |
312 | 433k | #if BIT_DEPTH < 14 |
313 | 433k | const int offset = 1 << (shift - 1); |
314 | | #else |
315 | | const int offset = 0; |
316 | | #endif |
317 | | |
318 | 433k | src -= LUMA_EXTRA_BEFORE * src_stride; |
319 | 9.03M | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
320 | 152M | for (int x = 0; x < width; x++) |
321 | 143M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
322 | 8.60M | src += src_stride; |
323 | 8.60M | tmp += MAX_PB_SIZE; |
324 | 8.60M | } |
325 | | |
326 | 433k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; |
327 | 433k | filter = vf; |
328 | 6.00M | for (int y = 0; y < height; y++) { |
329 | 110M | for (int x = 0; x < width; x++) |
330 | 105M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); |
331 | 5.56M | tmp += MAX_PB_SIZE; |
332 | 5.56M | dst += dst_stride; |
333 | 5.56M | } |
334 | 433k | } dsp.c:put_uni_luma_w_hv_9 Line | Count | Source | 302 | 18.0k | { | 303 | 18.0k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 18.0k | int16_t *tmp = tmp_array; | 305 | 18.0k | const pixel *src = (const pixel*)_src; | 306 | 18.0k | pixel *dst = (pixel *)_dst; | 307 | 18.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 18.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 18.0k | const int8_t *filter = hf; | 310 | 18.0k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 18.0k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 18.0k | #if BIT_DEPTH < 14 | 313 | 18.0k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 18.0k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 490k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 12.1M | for (int x = 0; x < width; x++) | 321 | 11.6M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 472k | src += src_stride; | 323 | 472k | tmp += MAX_PB_SIZE; | 324 | 472k | } | 325 | | | 326 | 18.0k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 18.0k | filter = vf; | 328 | 363k | for (int y = 0; y < height; y++) { | 329 | 9.48M | for (int x = 0; x < width; x++) | 330 | 9.14M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 345k | tmp += MAX_PB_SIZE; | 332 | 345k | dst += dst_stride; | 333 | 345k | } | 334 | 18.0k | } |
dsp.c:put_uni_luma_w_hv_10 Line | Count | Source | 302 | 28.7k | { | 303 | 28.7k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 28.7k | int16_t *tmp = tmp_array; | 305 | 28.7k | const pixel *src = (const pixel*)_src; | 306 | 28.7k | pixel *dst = (pixel *)_dst; | 307 | 28.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 28.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 28.7k | const int8_t *filter = hf; | 310 | 28.7k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 28.7k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 28.7k | #if BIT_DEPTH < 14 | 313 | 28.7k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 28.7k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 690k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 15.4M | for (int x = 0; x < width; x++) | 321 | 14.8M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 661k | src += src_stride; | 323 | 661k | tmp += MAX_PB_SIZE; | 324 | 661k | } | 325 | | | 326 | 28.7k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 28.7k | filter = vf; | 328 | 489k | for (int y = 0; y < height; y++) { | 329 | 12.1M | for (int x = 0; x < width; x++) | 330 | 11.7M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 460k | tmp += MAX_PB_SIZE; | 332 | 460k | dst += dst_stride; | 333 | 460k | } | 334 | 28.7k | } |
dsp.c:put_uni_luma_w_hv_12 Line | Count | Source | 302 | 92.4k | { | 303 | 92.4k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 92.4k | int16_t *tmp = tmp_array; | 305 | 92.4k | const pixel *src = (const pixel*)_src; | 306 | 92.4k | pixel *dst = (pixel *)_dst; | 307 | 92.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 92.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 92.4k | const int8_t *filter = hf; | 310 | 92.4k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 92.4k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 92.4k | #if BIT_DEPTH < 14 | 313 | 92.4k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 92.4k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 1.75M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 25.2M | for (int x = 0; x < width; x++) | 321 | 23.6M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 1.65M | src += src_stride; | 323 | 1.65M | tmp += MAX_PB_SIZE; | 324 | 1.65M | } | 325 | | | 326 | 92.4k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 92.4k | filter = vf; | 328 | 1.10M | for (int y = 0; y < height; y++) { | 329 | 17.6M | for (int x = 0; x < width; x++) | 330 | 16.6M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 1.01M | tmp += MAX_PB_SIZE; | 332 | 1.01M | dst += dst_stride; | 333 | 1.01M | } | 334 | 92.4k | } |
dsp.c:put_uni_luma_w_hv_8 Line | Count | Source | 302 | 86.3k | { | 303 | 86.3k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 86.3k | int16_t *tmp = tmp_array; | 305 | 86.3k | const pixel *src = (const pixel*)_src; | 306 | 86.3k | pixel *dst = (pixel *)_dst; | 307 | 86.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 86.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 86.3k | const int8_t *filter = hf; | 310 | 86.3k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 86.3k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 86.3k | #if BIT_DEPTH < 14 | 313 | 86.3k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 86.3k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 1.83M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 29.2M | for (int x = 0; x < width; x++) | 321 | 27.4M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 1.74M | src += src_stride; | 323 | 1.74M | tmp += MAX_PB_SIZE; | 324 | 1.74M | } | 325 | | | 326 | 86.3k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 86.3k | filter = vf; | 328 | 1.22M | for (int y = 0; y < height; y++) { | 329 | 20.8M | for (int x = 0; x < width; x++) | 330 | 19.6M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 1.13M | tmp += MAX_PB_SIZE; | 332 | 1.13M | dst += dst_stride; | 333 | 1.13M | } | 334 | 86.3k | } |
dsp.c:put_uni_luma_w_hv_12 Line | Count | Source | 302 | 92.4k | { | 303 | 92.4k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 92.4k | int16_t *tmp = tmp_array; | 305 | 92.4k | const pixel *src = (const pixel*)_src; | 306 | 92.4k | pixel *dst = (pixel *)_dst; | 307 | 92.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 92.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 92.4k | const int8_t *filter = hf; | 310 | 92.4k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 92.4k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 92.4k | #if BIT_DEPTH < 14 | 313 | 92.4k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 92.4k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 1.75M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 25.2M | for (int x = 0; x < width; x++) | 321 | 23.6M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 1.65M | src += src_stride; | 323 | 1.65M | tmp += MAX_PB_SIZE; | 324 | 1.65M | } | 325 | | | 326 | 92.4k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 92.4k | filter = vf; | 328 | 1.10M | for (int y = 0; y < height; y++) { | 329 | 17.6M | for (int x = 0; x < width; x++) | 330 | 16.6M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 1.01M | tmp += MAX_PB_SIZE; | 332 | 1.01M | dst += dst_stride; | 333 | 1.01M | } | 334 | 92.4k | } |
dsp.c:put_uni_luma_w_hv_10 Line | Count | Source | 302 | 28.7k | { | 303 | 28.7k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 28.7k | int16_t *tmp = tmp_array; | 305 | 28.7k | const pixel *src = (const pixel*)_src; | 306 | 28.7k | pixel *dst = (pixel *)_dst; | 307 | 28.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 28.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 28.7k | const int8_t *filter = hf; | 310 | 28.7k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 28.7k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 28.7k | #if BIT_DEPTH < 14 | 313 | 28.7k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 28.7k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 690k | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 15.4M | for (int x = 0; x < width; x++) | 321 | 14.8M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 661k | src += src_stride; | 323 | 661k | tmp += MAX_PB_SIZE; | 324 | 661k | } | 325 | | | 326 | 28.7k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 28.7k | filter = vf; | 328 | 489k | for (int y = 0; y < height; y++) { | 329 | 12.1M | for (int x = 0; x < width; x++) | 330 | 11.7M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 460k | tmp += MAX_PB_SIZE; | 332 | 460k | dst += dst_stride; | 333 | 460k | } | 334 | 28.7k | } |
dsp.c:put_uni_luma_w_hv_8 Line | Count | Source | 302 | 86.3k | { | 303 | 86.3k | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | 304 | 86.3k | int16_t *tmp = tmp_array; | 305 | 86.3k | const pixel *src = (const pixel*)_src; | 306 | 86.3k | pixel *dst = (pixel *)_dst; | 307 | 86.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 308 | 86.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 309 | 86.3k | const int8_t *filter = hf; | 310 | 86.3k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 311 | 86.3k | const int shift = denom + 14 - BIT_DEPTH; | 312 | 86.3k | #if BIT_DEPTH < 14 | 313 | 86.3k | const int offset = 1 << (shift - 1); | 314 | | #else | 315 | | const int offset = 0; | 316 | | #endif | 317 | | | 318 | 86.3k | src -= LUMA_EXTRA_BEFORE * src_stride; | 319 | 1.83M | for (int y = 0; y < height + LUMA_EXTRA; y++) { | 320 | 29.2M | for (int x = 0; x < width; x++) | 321 | 27.4M | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 322 | 1.74M | src += src_stride; | 323 | 1.74M | tmp += MAX_PB_SIZE; | 324 | 1.74M | } | 325 | | | 326 | 86.3k | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | 327 | 86.3k | filter = vf; | 328 | 1.22M | for (int y = 0; y < height; y++) { | 329 | 20.8M | for (int x = 0; x < width; x++) | 330 | 19.6M | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 331 | 1.13M | tmp += MAX_PB_SIZE; | 332 | 1.13M | dst += dst_stride; | 333 | 1.13M | } | 334 | 86.3k | } |
|
335 | | |
336 | | #define CHROMA_FILTER(src, stride) \ |
337 | 1.29G | (filter[0] * src[x - stride] + \ |
338 | 1.29G | filter[1] * src[x] + \ |
339 | 1.29G | filter[2] * src[x + stride] + \ |
340 | 1.29G | filter[3] * src[x + 2 * stride]) |
341 | | |
342 | | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
343 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
344 | 614k | { |
345 | 614k | const pixel *src = (const pixel *)_src; |
346 | 614k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
347 | 614k | const int8_t *filter = hf; |
348 | | |
349 | 7.71M | for (int y = 0; y < height; y++) { |
350 | 123M | for (int x = 0; x < width; x++) |
351 | 116M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
352 | 7.10M | src += src_stride; |
353 | 7.10M | dst += MAX_PB_SIZE; |
354 | 7.10M | } |
355 | 614k | } Line | Count | Source | 344 | 50.0k | { | 345 | 50.0k | const pixel *src = (const pixel *)_src; | 346 | 50.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 50.0k | const int8_t *filter = hf; | 348 | | | 349 | 931k | for (int y = 0; y < height; y++) { | 350 | 13.6M | for (int x = 0; x < width; x++) | 351 | 12.7M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 881k | src += src_stride; | 353 | 881k | dst += MAX_PB_SIZE; | 354 | 881k | } | 355 | 50.0k | } |
Line | Count | Source | 344 | 36.3k | { | 345 | 36.3k | const pixel *src = (const pixel *)_src; | 346 | 36.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 36.3k | const int8_t *filter = hf; | 348 | | | 349 | 661k | for (int y = 0; y < height; y++) { | 350 | 10.0M | for (int x = 0; x < width; x++) | 351 | 9.41M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 625k | src += src_stride; | 353 | 625k | dst += MAX_PB_SIZE; | 354 | 625k | } | 355 | 36.3k | } |
Line | Count | Source | 344 | 132k | { | 345 | 132k | const pixel *src = (const pixel *)_src; | 346 | 132k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 132k | const int8_t *filter = hf; | 348 | | | 349 | 1.64M | for (int y = 0; y < height; y++) { | 350 | 31.9M | for (int x = 0; x < width; x++) | 351 | 30.3M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 1.51M | src += src_stride; | 353 | 1.51M | dst += MAX_PB_SIZE; | 354 | 1.51M | } | 355 | 132k | } |
Line | Count | Source | 344 | 113k | { | 345 | 113k | const pixel *src = (const pixel *)_src; | 346 | 113k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 113k | const int8_t *filter = hf; | 348 | | | 349 | 1.08M | for (int y = 0; y < height; y++) { | 350 | 13.0M | for (int x = 0; x < width; x++) | 351 | 12.0M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 971k | src += src_stride; | 353 | 971k | dst += MAX_PB_SIZE; | 354 | 971k | } | 355 | 113k | } |
Line | Count | Source | 344 | 132k | { | 345 | 132k | const pixel *src = (const pixel *)_src; | 346 | 132k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 132k | const int8_t *filter = hf; | 348 | | | 349 | 1.64M | for (int y = 0; y < height; y++) { | 350 | 31.9M | for (int x = 0; x < width; x++) | 351 | 30.3M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 1.51M | src += src_stride; | 353 | 1.51M | dst += MAX_PB_SIZE; | 354 | 1.51M | } | 355 | 132k | } |
Line | Count | Source | 344 | 36.3k | { | 345 | 36.3k | const pixel *src = (const pixel *)_src; | 346 | 36.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 36.3k | const int8_t *filter = hf; | 348 | | | 349 | 661k | for (int y = 0; y < height; y++) { | 350 | 10.0M | for (int x = 0; x < width; x++) | 351 | 9.41M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 625k | src += src_stride; | 353 | 625k | dst += MAX_PB_SIZE; | 354 | 625k | } | 355 | 36.3k | } |
Line | Count | Source | 344 | 113k | { | 345 | 113k | const pixel *src = (const pixel *)_src; | 346 | 113k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 347 | 113k | const int8_t *filter = hf; | 348 | | | 349 | 1.08M | for (int y = 0; y < height; y++) { | 350 | 13.0M | for (int x = 0; x < width; x++) | 351 | 12.0M | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 352 | 971k | src += src_stride; | 353 | 971k | dst += MAX_PB_SIZE; | 354 | 971k | } | 355 | 113k | } |
|
356 | | |
357 | | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
358 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
359 | 487k | { |
360 | 487k | const pixel *src = (const pixel *)_src; |
361 | 487k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
362 | 487k | const int8_t *filter = vf; |
363 | | |
364 | 6.44M | for (int y = 0; y < height; y++) { |
365 | 113M | for (int x = 0; x < width; x++) |
366 | 107M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); |
367 | 5.95M | src += src_stride; |
368 | 5.95M | dst += MAX_PB_SIZE; |
369 | 5.95M | } |
370 | 487k | } Line | Count | Source | 359 | 65.1k | { | 360 | 65.1k | const pixel *src = (const pixel *)_src; | 361 | 65.1k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 65.1k | const int8_t *filter = vf; | 363 | | | 364 | 1.34M | for (int y = 0; y < height; y++) { | 365 | 21.1M | for (int x = 0; x < width; x++) | 366 | 19.8M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 1.27M | src += src_stride; | 368 | 1.27M | dst += MAX_PB_SIZE; | 369 | 1.27M | } | 370 | 65.1k | } |
Line | Count | Source | 359 | 19.2k | { | 360 | 19.2k | const pixel *src = (const pixel *)_src; | 361 | 19.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 19.2k | const int8_t *filter = vf; | 363 | | | 364 | 261k | for (int y = 0; y < height; y++) { | 365 | 4.68M | for (int x = 0; x < width; x++) | 366 | 4.44M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 242k | src += src_stride; | 368 | 242k | dst += MAX_PB_SIZE; | 369 | 242k | } | 370 | 19.2k | } |
Line | Count | Source | 359 | 104k | { | 360 | 104k | const pixel *src = (const pixel *)_src; | 361 | 104k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 104k | const int8_t *filter = vf; | 363 | | | 364 | 1.42M | for (int y = 0; y < height; y++) { | 365 | 30.1M | for (int x = 0; x < width; x++) | 366 | 28.8M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 1.31M | src += src_stride; | 368 | 1.31M | dst += MAX_PB_SIZE; | 369 | 1.31M | } | 370 | 104k | } |
Line | Count | Source | 359 | 87.8k | { | 360 | 87.8k | const pixel *src = (const pixel *)_src; | 361 | 87.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 87.8k | const int8_t *filter = vf; | 363 | | | 364 | 866k | for (int y = 0; y < height; y++) { | 365 | 11.2M | for (int x = 0; x < width; x++) | 366 | 10.4M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 778k | src += src_stride; | 368 | 778k | dst += MAX_PB_SIZE; | 369 | 778k | } | 370 | 87.8k | } |
Line | Count | Source | 359 | 104k | { | 360 | 104k | const pixel *src = (const pixel *)_src; | 361 | 104k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 104k | const int8_t *filter = vf; | 363 | | | 364 | 1.42M | for (int y = 0; y < height; y++) { | 365 | 30.1M | for (int x = 0; x < width; x++) | 366 | 28.8M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 1.31M | src += src_stride; | 368 | 1.31M | dst += MAX_PB_SIZE; | 369 | 1.31M | } | 370 | 104k | } |
Line | Count | Source | 359 | 19.2k | { | 360 | 19.2k | const pixel *src = (const pixel *)_src; | 361 | 19.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 19.2k | const int8_t *filter = vf; | 363 | | | 364 | 261k | for (int y = 0; y < height; y++) { | 365 | 4.68M | for (int x = 0; x < width; x++) | 366 | 4.44M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 242k | src += src_stride; | 368 | 242k | dst += MAX_PB_SIZE; | 369 | 242k | } | 370 | 19.2k | } |
Line | Count | Source | 359 | 87.8k | { | 360 | 87.8k | const pixel *src = (const pixel *)_src; | 361 | 87.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 362 | 87.8k | const int8_t *filter = vf; | 363 | | | 364 | 866k | for (int y = 0; y < height; y++) { | 365 | 11.2M | for (int x = 0; x < width; x++) | 366 | 10.4M | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | 367 | 778k | src += src_stride; | 368 | 778k | dst += MAX_PB_SIZE; | 369 | 778k | } | 370 | 87.8k | } |
|
371 | | |
372 | | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
373 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
374 | 1.61M | { |
375 | 1.61M | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; |
376 | 1.61M | int16_t *tmp = tmp_array; |
377 | 1.61M | const pixel *src = (const pixel *)_src; |
378 | 1.61M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
379 | 1.61M | const int8_t *filter = hf; |
380 | | |
381 | 1.61M | src -= CHROMA_EXTRA_BEFORE * src_stride; |
382 | | |
383 | 24.3M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
384 | 377M | for (int x = 0; x < width; x++) |
385 | 354M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
386 | 22.7M | src += src_stride; |
387 | 22.7M | tmp += MAX_PB_SIZE; |
388 | 22.7M | } |
389 | | |
390 | 1.61M | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; |
391 | 1.61M | filter = vf; |
392 | | |
393 | 19.5M | for (int y = 0; y < height; y++) { |
394 | 323M | for (int x = 0; x < width; x++) |
395 | 306M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; |
396 | 17.9M | tmp += MAX_PB_SIZE; |
397 | 17.9M | dst += MAX_PB_SIZE; |
398 | 17.9M | } |
399 | 1.61M | } Line | Count | Source | 374 | 143k | { | 375 | 143k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 143k | int16_t *tmp = tmp_array; | 377 | 143k | const pixel *src = (const pixel *)_src; | 378 | 143k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 143k | const int8_t *filter = hf; | 380 | | | 381 | 143k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 3.52M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 52.9M | for (int x = 0; x < width; x++) | 385 | 49.5M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 3.38M | src += src_stride; | 387 | 3.38M | tmp += MAX_PB_SIZE; | 388 | 3.38M | } | 389 | | | 390 | 143k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 143k | filter = vf; | 392 | | | 393 | 3.09M | for (int y = 0; y < height; y++) { | 394 | 47.3M | for (int x = 0; x < width; x++) | 395 | 44.4M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 2.95M | tmp += MAX_PB_SIZE; | 397 | 2.95M | dst += MAX_PB_SIZE; | 398 | 2.95M | } | 399 | 143k | } |
Line | Count | Source | 374 | 69.9k | { | 375 | 69.9k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 69.9k | int16_t *tmp = tmp_array; | 377 | 69.9k | const pixel *src = (const pixel *)_src; | 378 | 69.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 69.9k | const int8_t *filter = hf; | 380 | | | 381 | 69.9k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 1.18M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 19.4M | for (int x = 0; x < width; x++) | 385 | 18.3M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 1.11M | src += src_stride; | 387 | 1.11M | tmp += MAX_PB_SIZE; | 388 | 1.11M | } | 389 | | | 390 | 69.9k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 69.9k | filter = vf; | 392 | | | 393 | 976k | for (int y = 0; y < height; y++) { | 394 | 16.9M | for (int x = 0; x < width; x++) | 395 | 16.0M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 906k | tmp += MAX_PB_SIZE; | 397 | 906k | dst += MAX_PB_SIZE; | 398 | 906k | } | 399 | 69.9k | } |
Line | Count | Source | 374 | 304k | { | 375 | 304k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 304k | int16_t *tmp = tmp_array; | 377 | 304k | const pixel *src = (const pixel *)_src; | 378 | 304k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 304k | const int8_t *filter = hf; | 380 | | | 381 | 304k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 4.88M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 96.7M | for (int x = 0; x < width; x++) | 385 | 92.2M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 4.57M | src += src_stride; | 387 | 4.57M | tmp += MAX_PB_SIZE; | 388 | 4.57M | } | 389 | | | 390 | 304k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 304k | filter = vf; | 392 | | | 393 | 3.96M | for (int y = 0; y < height; y++) { | 394 | 85.0M | for (int x = 0; x < width; x++) | 395 | 81.3M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 3.66M | tmp += MAX_PB_SIZE; | 397 | 3.66M | dst += MAX_PB_SIZE; | 398 | 3.66M | } | 399 | 304k | } |
Line | Count | Source | 374 | 359k | { | 375 | 359k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 359k | int16_t *tmp = tmp_array; | 377 | 359k | const pixel *src = (const pixel *)_src; | 378 | 359k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 359k | const int8_t *filter = hf; | 380 | | | 381 | 359k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 4.35M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 45.7M | for (int x = 0; x < width; x++) | 385 | 41.8M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 3.99M | src += src_stride; | 387 | 3.99M | tmp += MAX_PB_SIZE; | 388 | 3.99M | } | 389 | | | 390 | 359k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 359k | filter = vf; | 392 | | | 393 | 3.27M | for (int y = 0; y < height; y++) { | 394 | 36.3M | for (int x = 0; x < width; x++) | 395 | 33.4M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 2.91M | tmp += MAX_PB_SIZE; | 397 | 2.91M | dst += MAX_PB_SIZE; | 398 | 2.91M | } | 399 | 359k | } |
Line | Count | Source | 374 | 304k | { | 375 | 304k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 304k | int16_t *tmp = tmp_array; | 377 | 304k | const pixel *src = (const pixel *)_src; | 378 | 304k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 304k | const int8_t *filter = hf; | 380 | | | 381 | 304k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 4.88M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 96.7M | for (int x = 0; x < width; x++) | 385 | 92.2M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 4.57M | src += src_stride; | 387 | 4.57M | tmp += MAX_PB_SIZE; | 388 | 4.57M | } | 389 | | | 390 | 304k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 304k | filter = vf; | 392 | | | 393 | 3.96M | for (int y = 0; y < height; y++) { | 394 | 85.0M | for (int x = 0; x < width; x++) | 395 | 81.3M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 3.66M | tmp += MAX_PB_SIZE; | 397 | 3.66M | dst += MAX_PB_SIZE; | 398 | 3.66M | } | 399 | 304k | } |
Line | Count | Source | 374 | 69.9k | { | 375 | 69.9k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 69.9k | int16_t *tmp = tmp_array; | 377 | 69.9k | const pixel *src = (const pixel *)_src; | 378 | 69.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 69.9k | const int8_t *filter = hf; | 380 | | | 381 | 69.9k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 1.18M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 19.4M | for (int x = 0; x < width; x++) | 385 | 18.3M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 1.11M | src += src_stride; | 387 | 1.11M | tmp += MAX_PB_SIZE; | 388 | 1.11M | } | 389 | | | 390 | 69.9k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 69.9k | filter = vf; | 392 | | | 393 | 976k | for (int y = 0; y < height; y++) { | 394 | 16.9M | for (int x = 0; x < width; x++) | 395 | 16.0M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 906k | tmp += MAX_PB_SIZE; | 397 | 906k | dst += MAX_PB_SIZE; | 398 | 906k | } | 399 | 69.9k | } |
Line | Count | Source | 374 | 359k | { | 375 | 359k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 376 | 359k | int16_t *tmp = tmp_array; | 377 | 359k | const pixel *src = (const pixel *)_src; | 378 | 359k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 379 | 359k | const int8_t *filter = hf; | 380 | | | 381 | 359k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 382 | | | 383 | 4.35M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 384 | 45.7M | for (int x = 0; x < width; x++) | 385 | 41.8M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 386 | 3.99M | src += src_stride; | 387 | 3.99M | tmp += MAX_PB_SIZE; | 388 | 3.99M | } | 389 | | | 390 | 359k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 391 | 359k | filter = vf; | 392 | | | 393 | 3.27M | for (int y = 0; y < height; y++) { | 394 | 36.3M | for (int x = 0; x < width; x++) | 395 | 33.4M | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | 396 | 2.91M | tmp += MAX_PB_SIZE; | 397 | 2.91M | dst += MAX_PB_SIZE; | 398 | 2.91M | } | 399 | 359k | } |
|
400 | | |
401 | | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
402 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
403 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
404 | 1.00M | { |
405 | 1.00M | const pixel *src = (const pixel *)_src; |
406 | 1.00M | pixel *dst = (pixel *)_dst; |
407 | 1.00M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
408 | 1.00M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
409 | 1.00M | const int8_t *filter = hf; |
410 | 1.00M | const int shift = 14 - BIT_DEPTH; |
411 | 1.00M | #if BIT_DEPTH < 14 |
412 | 1.00M | const int offset = 1 << (shift - 1); |
413 | | #else |
414 | | const int offset = 0; |
415 | | #endif |
416 | | |
417 | 9.37M | for (int y = 0; y < height; y++) { |
418 | 135M | for (int x = 0; x < width; x++) |
419 | 126M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); |
420 | 8.37M | src += src_stride; |
421 | 8.37M | dst += dst_stride; |
422 | 8.37M | } |
423 | 1.00M | } Line | Count | Source | 404 | 26.3k | { | 405 | 26.3k | const pixel *src = (const pixel *)_src; | 406 | 26.3k | pixel *dst = (pixel *)_dst; | 407 | 26.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 26.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 26.3k | const int8_t *filter = hf; | 410 | 26.3k | const int shift = 14 - BIT_DEPTH; | 411 | 26.3k | #if BIT_DEPTH < 14 | 412 | 26.3k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 320k | for (int y = 0; y < height; y++) { | 418 | 3.85M | for (int x = 0; x < width; x++) | 419 | 3.56M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 294k | src += src_stride; | 421 | 294k | dst += dst_stride; | 422 | 294k | } | 423 | 26.3k | } |
dsp.c:put_uni_chroma_h_10 Line | Count | Source | 404 | 35.0k | { | 405 | 35.0k | const pixel *src = (const pixel *)_src; | 406 | 35.0k | pixel *dst = (pixel *)_dst; | 407 | 35.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 35.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 35.0k | const int8_t *filter = hf; | 410 | 35.0k | const int shift = 14 - BIT_DEPTH; | 411 | 35.0k | #if BIT_DEPTH < 14 | 412 | 35.0k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 268k | for (int y = 0; y < height; y++) { | 418 | 2.08M | for (int x = 0; x < width; x++) | 419 | 1.85M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 233k | src += src_stride; | 421 | 233k | dst += dst_stride; | 422 | 233k | } | 423 | 35.0k | } |
dsp.c:put_uni_chroma_h_12 Line | Count | Source | 404 | 193k | { | 405 | 193k | const pixel *src = (const pixel *)_src; | 406 | 193k | pixel *dst = (pixel *)_dst; | 407 | 193k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 193k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 193k | const int8_t *filter = hf; | 410 | 193k | const int shift = 14 - BIT_DEPTH; | 411 | 193k | #if BIT_DEPTH < 14 | 412 | 193k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 2.34M | for (int y = 0; y < height; y++) { | 418 | 44.4M | for (int x = 0; x < width; x++) | 419 | 42.3M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 2.14M | src += src_stride; | 421 | 2.14M | dst += dst_stride; | 422 | 2.14M | } | 423 | 193k | } |
Line | Count | Source | 404 | 259k | { | 405 | 259k | const pixel *src = (const pixel *)_src; | 406 | 259k | pixel *dst = (pixel *)_dst; | 407 | 259k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 259k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 259k | const int8_t *filter = hf; | 410 | 259k | const int shift = 14 - BIT_DEPTH; | 411 | 259k | #if BIT_DEPTH < 14 | 412 | 259k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 1.91M | for (int y = 0; y < height; y++) { | 418 | 19.1M | for (int x = 0; x < width; x++) | 419 | 17.4M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 1.65M | src += src_stride; | 421 | 1.65M | dst += dst_stride; | 422 | 1.65M | } | 423 | 259k | } |
dsp.c:put_uni_chroma_h_12 Line | Count | Source | 404 | 193k | { | 405 | 193k | const pixel *src = (const pixel *)_src; | 406 | 193k | pixel *dst = (pixel *)_dst; | 407 | 193k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 193k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 193k | const int8_t *filter = hf; | 410 | 193k | const int shift = 14 - BIT_DEPTH; | 411 | 193k | #if BIT_DEPTH < 14 | 412 | 193k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 2.34M | for (int y = 0; y < height; y++) { | 418 | 44.4M | for (int x = 0; x < width; x++) | 419 | 42.3M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 2.14M | src += src_stride; | 421 | 2.14M | dst += dst_stride; | 422 | 2.14M | } | 423 | 193k | } |
dsp.c:put_uni_chroma_h_10 Line | Count | Source | 404 | 35.0k | { | 405 | 35.0k | const pixel *src = (const pixel *)_src; | 406 | 35.0k | pixel *dst = (pixel *)_dst; | 407 | 35.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 35.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 35.0k | const int8_t *filter = hf; | 410 | 35.0k | const int shift = 14 - BIT_DEPTH; | 411 | 35.0k | #if BIT_DEPTH < 14 | 412 | 35.0k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 268k | for (int y = 0; y < height; y++) { | 418 | 2.08M | for (int x = 0; x < width; x++) | 419 | 1.85M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 233k | src += src_stride; | 421 | 233k | dst += dst_stride; | 422 | 233k | } | 423 | 35.0k | } |
Line | Count | Source | 404 | 259k | { | 405 | 259k | const pixel *src = (const pixel *)_src; | 406 | 259k | pixel *dst = (pixel *)_dst; | 407 | 259k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 408 | 259k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 409 | 259k | const int8_t *filter = hf; | 410 | 259k | const int shift = 14 - BIT_DEPTH; | 411 | 259k | #if BIT_DEPTH < 14 | 412 | 259k | const int offset = 1 << (shift - 1); | 413 | | #else | 414 | | const int offset = 0; | 415 | | #endif | 416 | | | 417 | 1.91M | for (int y = 0; y < height; y++) { | 418 | 19.1M | for (int x = 0; x < width; x++) | 419 | 17.4M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | 420 | 1.65M | src += src_stride; | 421 | 1.65M | dst += dst_stride; | 422 | 1.65M | } | 423 | 259k | } |
|
424 | | |
425 | | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
426 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
427 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
428 | 1.03M | { |
429 | 1.03M | const pixel *src = (const pixel *)_src; |
430 | 1.03M | pixel *dst = (pixel *)_dst; |
431 | 1.03M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
432 | 1.03M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
433 | 1.03M | const int8_t *filter = vf; |
434 | 1.03M | const int shift = 14 - BIT_DEPTH; |
435 | 1.03M | #if BIT_DEPTH < 14 |
436 | 1.03M | const int offset = 1 << (shift - 1); |
437 | | #else |
438 | | const int offset = 0; |
439 | | #endif |
440 | | |
441 | 8.45M | for (int y = 0; y < height; y++) { |
442 | 89.8M | for (int x = 0; x < width; x++) |
443 | 82.4M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); |
444 | 7.42M | src += src_stride; |
445 | 7.42M | dst += dst_stride; |
446 | 7.42M | } |
447 | 1.03M | } Line | Count | Source | 428 | 38.5k | { | 429 | 38.5k | const pixel *src = (const pixel *)_src; | 430 | 38.5k | pixel *dst = (pixel *)_dst; | 431 | 38.5k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 38.5k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 38.5k | const int8_t *filter = vf; | 434 | 38.5k | const int shift = 14 - BIT_DEPTH; | 435 | 38.5k | #if BIT_DEPTH < 14 | 436 | 38.5k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 307k | for (int y = 0; y < height; y++) { | 442 | 2.89M | for (int x = 0; x < width; x++) | 443 | 2.62M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 268k | src += src_stride; | 445 | 268k | dst += dst_stride; | 446 | 268k | } | 447 | 38.5k | } |
dsp.c:put_uni_chroma_v_10 Line | Count | Source | 428 | 62.7k | { | 429 | 62.7k | const pixel *src = (const pixel *)_src; | 430 | 62.7k | pixel *dst = (pixel *)_dst; | 431 | 62.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 62.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 62.7k | const int8_t *filter = vf; | 434 | 62.7k | const int shift = 14 - BIT_DEPTH; | 435 | 62.7k | #if BIT_DEPTH < 14 | 436 | 62.7k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 389k | for (int y = 0; y < height; y++) { | 442 | 2.88M | for (int x = 0; x < width; x++) | 443 | 2.56M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 327k | src += src_stride; | 445 | 327k | dst += dst_stride; | 446 | 327k | } | 447 | 62.7k | } |
dsp.c:put_uni_chroma_v_12 Line | Count | Source | 428 | 238k | { | 429 | 238k | const pixel *src = (const pixel *)_src; | 430 | 238k | pixel *dst = (pixel *)_dst; | 431 | 238k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 238k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 238k | const int8_t *filter = vf; | 434 | 238k | const int shift = 14 - BIT_DEPTH; | 435 | 238k | #if BIT_DEPTH < 14 | 436 | 238k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 2.33M | for (int y = 0; y < height; y++) { | 442 | 27.8M | for (int x = 0; x < width; x++) | 443 | 25.7M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 2.09M | src += src_stride; | 445 | 2.09M | dst += dst_stride; | 446 | 2.09M | } | 447 | 238k | } |
Line | Count | Source | 428 | 196k | { | 429 | 196k | const pixel *src = (const pixel *)_src; | 430 | 196k | pixel *dst = (pixel *)_dst; | 431 | 196k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 196k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 196k | const int8_t *filter = vf; | 434 | 196k | const int shift = 14 - BIT_DEPTH; | 435 | 196k | #if BIT_DEPTH < 14 | 436 | 196k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 1.35M | for (int y = 0; y < height; y++) { | 442 | 12.7M | for (int x = 0; x < width; x++) | 443 | 11.5M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 1.15M | src += src_stride; | 445 | 1.15M | dst += dst_stride; | 446 | 1.15M | } | 447 | 196k | } |
dsp.c:put_uni_chroma_v_12 Line | Count | Source | 428 | 238k | { | 429 | 238k | const pixel *src = (const pixel *)_src; | 430 | 238k | pixel *dst = (pixel *)_dst; | 431 | 238k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 238k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 238k | const int8_t *filter = vf; | 434 | 238k | const int shift = 14 - BIT_DEPTH; | 435 | 238k | #if BIT_DEPTH < 14 | 436 | 238k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 2.33M | for (int y = 0; y < height; y++) { | 442 | 27.8M | for (int x = 0; x < width; x++) | 443 | 25.7M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 2.09M | src += src_stride; | 445 | 2.09M | dst += dst_stride; | 446 | 2.09M | } | 447 | 238k | } |
dsp.c:put_uni_chroma_v_10 Line | Count | Source | 428 | 62.7k | { | 429 | 62.7k | const pixel *src = (const pixel *)_src; | 430 | 62.7k | pixel *dst = (pixel *)_dst; | 431 | 62.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 62.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 62.7k | const int8_t *filter = vf; | 434 | 62.7k | const int shift = 14 - BIT_DEPTH; | 435 | 62.7k | #if BIT_DEPTH < 14 | 436 | 62.7k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 389k | for (int y = 0; y < height; y++) { | 442 | 2.88M | for (int x = 0; x < width; x++) | 443 | 2.56M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 327k | src += src_stride; | 445 | 327k | dst += dst_stride; | 446 | 327k | } | 447 | 62.7k | } |
Line | Count | Source | 428 | 196k | { | 429 | 196k | const pixel *src = (const pixel *)_src; | 430 | 196k | pixel *dst = (pixel *)_dst; | 431 | 196k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 432 | 196k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 433 | 196k | const int8_t *filter = vf; | 434 | 196k | const int shift = 14 - BIT_DEPTH; | 435 | 196k | #if BIT_DEPTH < 14 | 436 | 196k | const int offset = 1 << (shift - 1); | 437 | | #else | 438 | | const int offset = 0; | 439 | | #endif | 440 | | | 441 | 1.35M | for (int y = 0; y < height; y++) { | 442 | 12.7M | for (int x = 0; x < width; x++) | 443 | 11.5M | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | 444 | 1.15M | src += src_stride; | 445 | 1.15M | dst += dst_stride; | 446 | 1.15M | } | 447 | 196k | } |
|
448 | | |
449 | | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
450 | | const uint8_t *_src, const ptrdiff_t _src_stride, |
451 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
452 | 3.17M | { |
453 | 3.17M | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; |
454 | 3.17M | int16_t *tmp = tmp_array; |
455 | 3.17M | const pixel *src = (const pixel *)_src; |
456 | 3.17M | pixel *dst = (pixel *)_dst; |
457 | 3.17M | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
458 | 3.17M | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
459 | 3.17M | const int8_t *filter = hf; |
460 | 3.17M | const int shift = 14 - BIT_DEPTH; |
461 | 3.17M | #if BIT_DEPTH < 14 |
462 | 3.17M | const int offset = 1 << (shift - 1); |
463 | | #else |
464 | | const int offset = 0; |
465 | | #endif |
466 | | |
467 | 3.17M | src -= CHROMA_EXTRA_BEFORE * src_stride; |
468 | | |
469 | 34.7M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
470 | 342M | for (int x = 0; x < width; x++) |
471 | 311M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
472 | 31.6M | src += src_stride; |
473 | 31.6M | tmp += MAX_PB_SIZE; |
474 | 31.6M | } |
475 | | |
476 | 3.17M | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; |
477 | 3.17M | filter = vf; |
478 | | |
479 | 25.2M | for (int y = 0; y < height; y++) { |
480 | 272M | for (int x = 0; x < width; x++) |
481 | 249M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); |
482 | 22.1M | tmp += MAX_PB_SIZE; |
483 | 22.1M | dst += dst_stride; |
484 | 22.1M | } |
485 | 3.17M | } dsp.c:put_uni_chroma_hv_9 Line | Count | Source | 452 | 74.7k | { | 453 | 74.7k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 74.7k | int16_t *tmp = tmp_array; | 455 | 74.7k | const pixel *src = (const pixel *)_src; | 456 | 74.7k | pixel *dst = (pixel *)_dst; | 457 | 74.7k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 74.7k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 74.7k | const int8_t *filter = hf; | 460 | 74.7k | const int shift = 14 - BIT_DEPTH; | 461 | 74.7k | #if BIT_DEPTH < 14 | 462 | 74.7k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 74.7k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 963k | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 9.33M | for (int x = 0; x < width; x++) | 471 | 8.44M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 889k | src += src_stride; | 473 | 889k | tmp += MAX_PB_SIZE; | 474 | 889k | } | 475 | | | 476 | 74.7k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 74.7k | filter = vf; | 478 | | | 479 | 739k | for (int y = 0; y < height; y++) { | 480 | 7.72M | for (int x = 0; x < width; x++) | 481 | 7.06M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 664k | tmp += MAX_PB_SIZE; | 483 | 664k | dst += dst_stride; | 484 | 664k | } | 485 | 74.7k | } |
dsp.c:put_uni_chroma_hv_10 Line | Count | Source | 452 | 77.3k | { | 453 | 77.3k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 77.3k | int16_t *tmp = tmp_array; | 455 | 77.3k | const pixel *src = (const pixel *)_src; | 456 | 77.3k | pixel *dst = (pixel *)_dst; | 457 | 77.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 77.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 77.3k | const int8_t *filter = hf; | 460 | 77.3k | const int shift = 14 - BIT_DEPTH; | 461 | 77.3k | #if BIT_DEPTH < 14 | 462 | 77.3k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 77.3k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 766k | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 4.83M | for (int x = 0; x < width; x++) | 471 | 4.14M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 689k | src += src_stride; | 473 | 689k | tmp += MAX_PB_SIZE; | 474 | 689k | } | 475 | | | 476 | 77.3k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 77.3k | filter = vf; | 478 | | | 479 | 534k | for (int y = 0; y < height; y++) { | 480 | 3.49M | for (int x = 0; x < width; x++) | 481 | 3.04M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 457k | tmp += MAX_PB_SIZE; | 483 | 457k | dst += dst_stride; | 484 | 457k | } | 485 | 77.3k | } |
dsp.c:put_uni_chroma_hv_12 Line | Count | Source | 452 | 563k | { | 453 | 563k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 563k | int16_t *tmp = tmp_array; | 455 | 563k | const pixel *src = (const pixel *)_src; | 456 | 563k | pixel *dst = (pixel *)_dst; | 457 | 563k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 563k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 563k | const int8_t *filter = hf; | 460 | 563k | const int shift = 14 - BIT_DEPTH; | 461 | 563k | #if BIT_DEPTH < 14 | 462 | 563k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 563k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 7.50M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 96.6M | for (int x = 0; x < width; x++) | 471 | 89.7M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 6.93M | src += src_stride; | 473 | 6.93M | tmp += MAX_PB_SIZE; | 474 | 6.93M | } | 475 | | | 476 | 563k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 563k | filter = vf; | 478 | | | 479 | 5.80M | for (int y = 0; y < height; y++) { | 480 | 80.4M | for (int x = 0; x < width; x++) | 481 | 75.2M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 5.24M | tmp += MAX_PB_SIZE; | 483 | 5.24M | dst += dst_stride; | 484 | 5.24M | } | 485 | 563k | } |
dsp.c:put_uni_chroma_hv_8 Line | Count | Source | 452 | 907k | { | 453 | 907k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 907k | int16_t *tmp = tmp_array; | 455 | 907k | const pixel *src = (const pixel *)_src; | 456 | 907k | pixel *dst = (pixel *)_dst; | 457 | 907k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 907k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 907k | const int8_t *filter = hf; | 460 | 907k | const int shift = 14 - BIT_DEPTH; | 461 | 907k | #if BIT_DEPTH < 14 | 462 | 907k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 907k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 8.65M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 65.2M | for (int x = 0; x < width; x++) | 471 | 57.4M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 7.74M | src += src_stride; | 473 | 7.74M | tmp += MAX_PB_SIZE; | 474 | 7.74M | } | 475 | | | 476 | 907k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 907k | filter = vf; | 478 | | | 479 | 5.92M | for (int y = 0; y < height; y++) { | 480 | 48.1M | for (int x = 0; x < width; x++) | 481 | 43.1M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 5.02M | tmp += MAX_PB_SIZE; | 483 | 5.02M | dst += dst_stride; | 484 | 5.02M | } | 485 | 907k | } |
dsp.c:put_uni_chroma_hv_12 Line | Count | Source | 452 | 563k | { | 453 | 563k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 563k | int16_t *tmp = tmp_array; | 455 | 563k | const pixel *src = (const pixel *)_src; | 456 | 563k | pixel *dst = (pixel *)_dst; | 457 | 563k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 563k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 563k | const int8_t *filter = hf; | 460 | 563k | const int shift = 14 - BIT_DEPTH; | 461 | 563k | #if BIT_DEPTH < 14 | 462 | 563k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 563k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 7.50M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 96.6M | for (int x = 0; x < width; x++) | 471 | 89.7M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 6.93M | src += src_stride; | 473 | 6.93M | tmp += MAX_PB_SIZE; | 474 | 6.93M | } | 475 | | | 476 | 563k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 563k | filter = vf; | 478 | | | 479 | 5.80M | for (int y = 0; y < height; y++) { | 480 | 80.4M | for (int x = 0; x < width; x++) | 481 | 75.2M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 5.24M | tmp += MAX_PB_SIZE; | 483 | 5.24M | dst += dst_stride; | 484 | 5.24M | } | 485 | 563k | } |
dsp.c:put_uni_chroma_hv_10 Line | Count | Source | 452 | 77.3k | { | 453 | 77.3k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 77.3k | int16_t *tmp = tmp_array; | 455 | 77.3k | const pixel *src = (const pixel *)_src; | 456 | 77.3k | pixel *dst = (pixel *)_dst; | 457 | 77.3k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 77.3k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 77.3k | const int8_t *filter = hf; | 460 | 77.3k | const int shift = 14 - BIT_DEPTH; | 461 | 77.3k | #if BIT_DEPTH < 14 | 462 | 77.3k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 77.3k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 766k | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 4.83M | for (int x = 0; x < width; x++) | 471 | 4.14M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 689k | src += src_stride; | 473 | 689k | tmp += MAX_PB_SIZE; | 474 | 689k | } | 475 | | | 476 | 77.3k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 77.3k | filter = vf; | 478 | | | 479 | 534k | for (int y = 0; y < height; y++) { | 480 | 3.49M | for (int x = 0; x < width; x++) | 481 | 3.04M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 457k | tmp += MAX_PB_SIZE; | 483 | 457k | dst += dst_stride; | 484 | 457k | } | 485 | 77.3k | } |
dsp.c:put_uni_chroma_hv_8 Line | Count | Source | 452 | 907k | { | 453 | 907k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 454 | 907k | int16_t *tmp = tmp_array; | 455 | 907k | const pixel *src = (const pixel *)_src; | 456 | 907k | pixel *dst = (pixel *)_dst; | 457 | 907k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 458 | 907k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 459 | 907k | const int8_t *filter = hf; | 460 | 907k | const int shift = 14 - BIT_DEPTH; | 461 | 907k | #if BIT_DEPTH < 14 | 462 | 907k | const int offset = 1 << (shift - 1); | 463 | | #else | 464 | | const int offset = 0; | 465 | | #endif | 466 | | | 467 | 907k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 468 | | | 469 | 8.65M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 470 | 65.2M | for (int x = 0; x < width; x++) | 471 | 57.4M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 472 | 7.74M | src += src_stride; | 473 | 7.74M | tmp += MAX_PB_SIZE; | 474 | 7.74M | } | 475 | | | 476 | 907k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 477 | 907k | filter = vf; | 478 | | | 479 | 5.92M | for (int y = 0; y < height; y++) { | 480 | 48.1M | for (int x = 0; x < width; x++) | 481 | 43.1M | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | 482 | 5.02M | tmp += MAX_PB_SIZE; | 483 | 5.02M | dst += dst_stride; | 484 | 5.02M | } | 485 | 907k | } |
|
486 | | |
487 | | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, |
488 | | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, |
489 | | const int8_t *hf, const int8_t *vf, int width) |
490 | 275k | { |
491 | 275k | const pixel *src = (const pixel *)_src; |
492 | 275k | pixel *dst = (pixel *)_dst; |
493 | 275k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
494 | 275k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
495 | 275k | const int8_t *filter = hf; |
496 | 275k | const int shift = denom + 14 - BIT_DEPTH; |
497 | 275k | #if BIT_DEPTH < 14 |
498 | 275k | const int offset = 1 << (shift - 1); |
499 | | #else |
500 | | const int offset = 0; |
501 | | #endif |
502 | | |
503 | 275k | ox = ox * (1 << (BIT_DEPTH - 8)); |
504 | 2.60M | for (int y = 0; y < height; y++) { |
505 | 31.9M | for (int x = 0; x < width; x++) { |
506 | 29.6M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
507 | 29.6M | } |
508 | 2.32M | dst += dst_stride; |
509 | 2.32M | src += src_stride; |
510 | 2.32M | } |
511 | 275k | } dsp.c:put_uni_chroma_w_h_9 Line | Count | Source | 490 | 20.0k | { | 491 | 20.0k | const pixel *src = (const pixel *)_src; | 492 | 20.0k | pixel *dst = (pixel *)_dst; | 493 | 20.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 20.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 20.0k | const int8_t *filter = hf; | 496 | 20.0k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 20.0k | #if BIT_DEPTH < 14 | 498 | 20.0k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 20.0k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 403k | for (int y = 0; y < height; y++) { | 505 | 6.15M | for (int x = 0; x < width; x++) { | 506 | 5.77M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 5.77M | } | 508 | 383k | dst += dst_stride; | 509 | 383k | src += src_stride; | 510 | 383k | } | 511 | 20.0k | } |
dsp.c:put_uni_chroma_w_h_10 Line | Count | Source | 490 | 22.4k | { | 491 | 22.4k | const pixel *src = (const pixel *)_src; | 492 | 22.4k | pixel *dst = (pixel *)_dst; | 493 | 22.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 22.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 22.4k | const int8_t *filter = hf; | 496 | 22.4k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 22.4k | #if BIT_DEPTH < 14 | 498 | 22.4k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 22.4k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 256k | for (int y = 0; y < height; y++) { | 505 | 2.81M | for (int x = 0; x < width; x++) { | 506 | 2.57M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 2.57M | } | 508 | 233k | dst += dst_stride; | 509 | 233k | src += src_stride; | 510 | 233k | } | 511 | 22.4k | } |
dsp.c:put_uni_chroma_w_h_12 Line | Count | Source | 490 | 59.2k | { | 491 | 59.2k | const pixel *src = (const pixel *)_src; | 492 | 59.2k | pixel *dst = (pixel *)_dst; | 493 | 59.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 59.2k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 59.2k | const int8_t *filter = hf; | 496 | 59.2k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 59.2k | #if BIT_DEPTH < 14 | 498 | 59.2k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 59.2k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 546k | for (int y = 0; y < height; y++) { | 505 | 7.63M | for (int x = 0; x < width; x++) { | 506 | 7.14M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 7.14M | } | 508 | 487k | dst += dst_stride; | 509 | 487k | src += src_stride; | 510 | 487k | } | 511 | 59.2k | } |
dsp.c:put_uni_chroma_w_h_8 Line | Count | Source | 490 | 45.9k | { | 491 | 45.9k | const pixel *src = (const pixel *)_src; | 492 | 45.9k | pixel *dst = (pixel *)_dst; | 493 | 45.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 45.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 45.9k | const int8_t *filter = hf; | 496 | 45.9k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 45.9k | #if BIT_DEPTH < 14 | 498 | 45.9k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 45.9k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 297k | for (int y = 0; y < height; y++) { | 505 | 2.46M | for (int x = 0; x < width; x++) { | 506 | 2.20M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 2.20M | } | 508 | 251k | dst += dst_stride; | 509 | 251k | src += src_stride; | 510 | 251k | } | 511 | 45.9k | } |
dsp.c:put_uni_chroma_w_h_12 Line | Count | Source | 490 | 59.2k | { | 491 | 59.2k | const pixel *src = (const pixel *)_src; | 492 | 59.2k | pixel *dst = (pixel *)_dst; | 493 | 59.2k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 59.2k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 59.2k | const int8_t *filter = hf; | 496 | 59.2k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 59.2k | #if BIT_DEPTH < 14 | 498 | 59.2k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 59.2k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 546k | for (int y = 0; y < height; y++) { | 505 | 7.63M | for (int x = 0; x < width; x++) { | 506 | 7.14M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 7.14M | } | 508 | 487k | dst += dst_stride; | 509 | 487k | src += src_stride; | 510 | 487k | } | 511 | 59.2k | } |
dsp.c:put_uni_chroma_w_h_10 Line | Count | Source | 490 | 22.4k | { | 491 | 22.4k | const pixel *src = (const pixel *)_src; | 492 | 22.4k | pixel *dst = (pixel *)_dst; | 493 | 22.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 22.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 22.4k | const int8_t *filter = hf; | 496 | 22.4k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 22.4k | #if BIT_DEPTH < 14 | 498 | 22.4k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 22.4k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 256k | for (int y = 0; y < height; y++) { | 505 | 2.81M | for (int x = 0; x < width; x++) { | 506 | 2.57M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 2.57M | } | 508 | 233k | dst += dst_stride; | 509 | 233k | src += src_stride; | 510 | 233k | } | 511 | 22.4k | } |
dsp.c:put_uni_chroma_w_h_8 Line | Count | Source | 490 | 45.9k | { | 491 | 45.9k | const pixel *src = (const pixel *)_src; | 492 | 45.9k | pixel *dst = (pixel *)_dst; | 493 | 45.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 494 | 45.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 495 | 45.9k | const int8_t *filter = hf; | 496 | 45.9k | const int shift = denom + 14 - BIT_DEPTH; | 497 | 45.9k | #if BIT_DEPTH < 14 | 498 | 45.9k | const int offset = 1 << (shift - 1); | 499 | | #else | 500 | | const int offset = 0; | 501 | | #endif | 502 | | | 503 | 45.9k | ox = ox * (1 << (BIT_DEPTH - 8)); | 504 | 297k | for (int y = 0; y < height; y++) { | 505 | 2.46M | for (int x = 0; x < width; x++) { | 506 | 2.20M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 507 | 2.20M | } | 508 | 251k | dst += dst_stride; | 509 | 251k | src += src_stride; | 510 | 251k | } | 511 | 45.9k | } |
|
512 | | |
513 | | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
514 | | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, |
515 | | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, |
516 | | const int width) |
517 | 252k | { |
518 | 252k | const pixel *src = (const pixel *)_src; |
519 | 252k | pixel *dst = (pixel *)_dst; |
520 | 252k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
521 | 252k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
522 | 252k | const int8_t *filter = vf; |
523 | 252k | const int shift = denom + 14 - BIT_DEPTH; |
524 | 252k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
525 | 252k | #if BIT_DEPTH < 14 |
526 | 252k | int offset = 1 << (shift - 1); |
527 | | #else |
528 | | int offset = 0; |
529 | | #endif |
530 | | |
531 | 2.32M | for (int y = 0; y < height; y++) { |
532 | 27.2M | for (int x = 0; x < width; x++) { |
533 | 25.2M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
534 | 25.2M | } |
535 | 2.07M | dst += dst_stride; |
536 | 2.07M | src += src_stride; |
537 | 2.07M | } |
538 | 252k | } dsp.c:put_uni_chroma_w_v_9 Line | Count | Source | 517 | 18.9k | { | 518 | 18.9k | const pixel *src = (const pixel *)_src; | 519 | 18.9k | pixel *dst = (pixel *)_dst; | 520 | 18.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 18.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 18.9k | const int8_t *filter = vf; | 523 | 18.9k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 18.9k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 18.9k | #if BIT_DEPTH < 14 | 526 | 18.9k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 307k | for (int y = 0; y < height; y++) { | 532 | 4.69M | for (int x = 0; x < width; x++) { | 533 | 4.41M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 4.41M | } | 535 | 288k | dst += dst_stride; | 536 | 288k | src += src_stride; | 537 | 288k | } | 538 | 18.9k | } |
dsp.c:put_uni_chroma_w_v_10 Line | Count | Source | 517 | 23.9k | { | 518 | 23.9k | const pixel *src = (const pixel *)_src; | 519 | 23.9k | pixel *dst = (pixel *)_dst; | 520 | 23.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 23.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 23.9k | const int8_t *filter = vf; | 523 | 23.9k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 23.9k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 23.9k | #if BIT_DEPTH < 14 | 526 | 23.9k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 257k | for (int y = 0; y < height; y++) { | 532 | 2.57M | for (int x = 0; x < width; x++) { | 533 | 2.34M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 2.34M | } | 535 | 233k | dst += dst_stride; | 536 | 233k | src += src_stride; | 537 | 233k | } | 538 | 23.9k | } |
dsp.c:put_uni_chroma_w_v_12 Line | Count | Source | 517 | 40.4k | { | 518 | 40.4k | const pixel *src = (const pixel *)_src; | 519 | 40.4k | pixel *dst = (pixel *)_dst; | 520 | 40.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 40.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 40.4k | const int8_t *filter = vf; | 523 | 40.4k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 40.4k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 40.4k | #if BIT_DEPTH < 14 | 526 | 40.4k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 410k | for (int y = 0; y < height; y++) { | 532 | 6.04M | for (int x = 0; x < width; x++) { | 533 | 5.67M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 5.67M | } | 535 | 369k | dst += dst_stride; | 536 | 369k | src += src_stride; | 537 | 369k | } | 538 | 40.4k | } |
dsp.c:put_uni_chroma_w_v_8 Line | Count | Source | 517 | 52.5k | { | 518 | 52.5k | const pixel *src = (const pixel *)_src; | 519 | 52.5k | pixel *dst = (pixel *)_dst; | 520 | 52.5k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 52.5k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 52.5k | const int8_t *filter = vf; | 523 | 52.5k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 52.5k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 52.5k | #if BIT_DEPTH < 14 | 526 | 52.5k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 341k | for (int y = 0; y < height; y++) { | 532 | 2.67M | for (int x = 0; x < width; x++) { | 533 | 2.38M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 2.38M | } | 535 | 288k | dst += dst_stride; | 536 | 288k | src += src_stride; | 537 | 288k | } | 538 | 52.5k | } |
dsp.c:put_uni_chroma_w_v_12 Line | Count | Source | 517 | 40.4k | { | 518 | 40.4k | const pixel *src = (const pixel *)_src; | 519 | 40.4k | pixel *dst = (pixel *)_dst; | 520 | 40.4k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 40.4k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 40.4k | const int8_t *filter = vf; | 523 | 40.4k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 40.4k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 40.4k | #if BIT_DEPTH < 14 | 526 | 40.4k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 410k | for (int y = 0; y < height; y++) { | 532 | 6.04M | for (int x = 0; x < width; x++) { | 533 | 5.67M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 5.67M | } | 535 | 369k | dst += dst_stride; | 536 | 369k | src += src_stride; | 537 | 369k | } | 538 | 40.4k | } |
dsp.c:put_uni_chroma_w_v_10 Line | Count | Source | 517 | 23.9k | { | 518 | 23.9k | const pixel *src = (const pixel *)_src; | 519 | 23.9k | pixel *dst = (pixel *)_dst; | 520 | 23.9k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 23.9k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 23.9k | const int8_t *filter = vf; | 523 | 23.9k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 23.9k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 23.9k | #if BIT_DEPTH < 14 | 526 | 23.9k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 257k | for (int y = 0; y < height; y++) { | 532 | 2.57M | for (int x = 0; x < width; x++) { | 533 | 2.34M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 2.34M | } | 535 | 233k | dst += dst_stride; | 536 | 233k | src += src_stride; | 537 | 233k | } | 538 | 23.9k | } |
dsp.c:put_uni_chroma_w_v_8 Line | Count | Source | 517 | 52.5k | { | 518 | 52.5k | const pixel *src = (const pixel *)_src; | 519 | 52.5k | pixel *dst = (pixel *)_dst; | 520 | 52.5k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 521 | 52.5k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 522 | 52.5k | const int8_t *filter = vf; | 523 | 52.5k | const int shift = denom + 14 - BIT_DEPTH; | 524 | 52.5k | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | 525 | 52.5k | #if BIT_DEPTH < 14 | 526 | 52.5k | int offset = 1 << (shift - 1); | 527 | | #else | 528 | | int offset = 0; | 529 | | #endif | 530 | | | 531 | 341k | for (int y = 0; y < height; y++) { | 532 | 2.67M | for (int x = 0; x < width; x++) { | 533 | 2.38M | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | 534 | 2.38M | } | 535 | 288k | dst += dst_stride; | 536 | 288k | src += src_stride; | 537 | 288k | } | 538 | 52.5k | } |
|
539 | | |
540 | | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, |
541 | | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, |
542 | | const int8_t *hf, const int8_t *vf, int width) |
543 | 952k | { |
544 | 952k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; |
545 | 952k | int16_t *tmp = tmp_array; |
546 | 952k | const pixel *src = (const pixel *)_src; |
547 | 952k | pixel *dst = (pixel *)_dst; |
548 | 952k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
549 | 952k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
550 | 952k | const int8_t *filter = hf; |
551 | 952k | const int shift = denom + 14 - BIT_DEPTH; |
552 | 952k | #if BIT_DEPTH < 14 |
553 | 952k | const int offset = 1 << (shift - 1); |
554 | | #else |
555 | | const int offset = 0; |
556 | | #endif |
557 | | |
558 | 952k | src -= CHROMA_EXTRA_BEFORE * src_stride; |
559 | | |
560 | 11.1M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
561 | 111M | for (int x = 0; x < width; x++) |
562 | 101M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); |
563 | 10.2M | src += src_stride; |
564 | 10.2M | tmp += MAX_PB_SIZE; |
565 | 10.2M | } |
566 | | |
567 | 952k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; |
568 | 952k | filter = vf; |
569 | | |
570 | 952k | ox = ox * (1 << (BIT_DEPTH - 8)); |
571 | 8.29M | for (int y = 0; y < height; y++) { |
572 | 89.2M | for (int x = 0; x < width; x++) |
573 | 81.9M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); |
574 | 7.34M | tmp += MAX_PB_SIZE; |
575 | 7.34M | dst += dst_stride; |
576 | 7.34M | } |
577 | 952k | } dsp.c:put_uni_chroma_w_hv_9 Line | Count | Source | 543 | 36.0k | { | 544 | 36.0k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 36.0k | int16_t *tmp = tmp_array; | 546 | 36.0k | const pixel *src = (const pixel *)_src; | 547 | 36.0k | pixel *dst = (pixel *)_dst; | 548 | 36.0k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 36.0k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 36.0k | const int8_t *filter = hf; | 551 | 36.0k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 36.0k | #if BIT_DEPTH < 14 | 553 | 36.0k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 36.0k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 795k | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 11.6M | for (int x = 0; x < width; x++) | 562 | 10.8M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 758k | src += src_stride; | 564 | 758k | tmp += MAX_PB_SIZE; | 565 | 758k | } | 566 | | | 567 | 36.0k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 36.0k | filter = vf; | 569 | | | 570 | 36.0k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 686k | for (int y = 0; y < height; y++) { | 572 | 10.3M | for (int x = 0; x < width; x++) | 573 | 9.65M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 650k | tmp += MAX_PB_SIZE; | 575 | 650k | dst += dst_stride; | 576 | 650k | } | 577 | 36.0k | } |
dsp.c:put_uni_chroma_w_hv_10 Line | Count | Source | 543 | 70.8k | { | 544 | 70.8k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 70.8k | int16_t *tmp = tmp_array; | 546 | 70.8k | const pixel *src = (const pixel *)_src; | 547 | 70.8k | pixel *dst = (pixel *)_dst; | 548 | 70.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 70.8k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 70.8k | const int8_t *filter = hf; | 551 | 70.8k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 70.8k | #if BIT_DEPTH < 14 | 553 | 70.8k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 70.8k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 1.00M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 11.5M | for (int x = 0; x < width; x++) | 562 | 10.6M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 937k | src += src_stride; | 564 | 937k | tmp += MAX_PB_SIZE; | 565 | 937k | } | 566 | | | 567 | 70.8k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 70.8k | filter = vf; | 569 | | | 570 | 70.8k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 796k | for (int y = 0; y < height; y++) { | 572 | 9.65M | for (int x = 0; x < width; x++) | 573 | 8.93M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 725k | tmp += MAX_PB_SIZE; | 575 | 725k | dst += dst_stride; | 576 | 725k | } | 577 | 70.8k | } |
dsp.c:put_uni_chroma_w_hv_12 Line | Count | Source | 543 | 190k | { | 544 | 190k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 190k | int16_t *tmp = tmp_array; | 546 | 190k | const pixel *src = (const pixel *)_src; | 547 | 190k | pixel *dst = (pixel *)_dst; | 548 | 190k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 190k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 190k | const int8_t *filter = hf; | 551 | 190k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 190k | #if BIT_DEPTH < 14 | 553 | 190k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 190k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 2.09M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 21.7M | for (int x = 0; x < width; x++) | 562 | 19.8M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 1.90M | src += src_stride; | 564 | 1.90M | tmp += MAX_PB_SIZE; | 565 | 1.90M | } | 566 | | | 567 | 190k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 190k | filter = vf; | 569 | | | 570 | 190k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 1.52M | for (int y = 0; y < height; y++) { | 572 | 17.3M | for (int x = 0; x < width; x++) | 573 | 15.9M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 1.33M | tmp += MAX_PB_SIZE; | 575 | 1.33M | dst += dst_stride; | 576 | 1.33M | } | 577 | 190k | } |
dsp.c:put_uni_chroma_w_hv_8 Line | Count | Source | 543 | 196k | { | 544 | 196k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 196k | int16_t *tmp = tmp_array; | 546 | 196k | const pixel *src = (const pixel *)_src; | 547 | 196k | pixel *dst = (pixel *)_dst; | 548 | 196k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 196k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 196k | const int8_t *filter = hf; | 551 | 196k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 196k | #if BIT_DEPTH < 14 | 553 | 196k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 196k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 2.07M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 16.8M | for (int x = 0; x < width; x++) | 562 | 14.9M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 1.87M | src += src_stride; | 564 | 1.87M | tmp += MAX_PB_SIZE; | 565 | 1.87M | } | 566 | | | 567 | 196k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 196k | filter = vf; | 569 | | | 570 | 196k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 1.48M | for (int y = 0; y < height; y++) { | 572 | 12.4M | for (int x = 0; x < width; x++) | 573 | 11.2M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 1.28M | tmp += MAX_PB_SIZE; | 575 | 1.28M | dst += dst_stride; | 576 | 1.28M | } | 577 | 196k | } |
dsp.c:put_uni_chroma_w_hv_12 Line | Count | Source | 543 | 190k | { | 544 | 190k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 190k | int16_t *tmp = tmp_array; | 546 | 190k | const pixel *src = (const pixel *)_src; | 547 | 190k | pixel *dst = (pixel *)_dst; | 548 | 190k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 190k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 190k | const int8_t *filter = hf; | 551 | 190k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 190k | #if BIT_DEPTH < 14 | 553 | 190k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 190k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 2.09M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 21.7M | for (int x = 0; x < width; x++) | 562 | 19.8M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 1.90M | src += src_stride; | 564 | 1.90M | tmp += MAX_PB_SIZE; | 565 | 1.90M | } | 566 | | | 567 | 190k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 190k | filter = vf; | 569 | | | 570 | 190k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 1.52M | for (int y = 0; y < height; y++) { | 572 | 17.3M | for (int x = 0; x < width; x++) | 573 | 15.9M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 1.33M | tmp += MAX_PB_SIZE; | 575 | 1.33M | dst += dst_stride; | 576 | 1.33M | } | 577 | 190k | } |
dsp.c:put_uni_chroma_w_hv_10 Line | Count | Source | 543 | 70.8k | { | 544 | 70.8k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 70.8k | int16_t *tmp = tmp_array; | 546 | 70.8k | const pixel *src = (const pixel *)_src; | 547 | 70.8k | pixel *dst = (pixel *)_dst; | 548 | 70.8k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 70.8k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 70.8k | const int8_t *filter = hf; | 551 | 70.8k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 70.8k | #if BIT_DEPTH < 14 | 553 | 70.8k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 70.8k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 1.00M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 11.5M | for (int x = 0; x < width; x++) | 562 | 10.6M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 937k | src += src_stride; | 564 | 937k | tmp += MAX_PB_SIZE; | 565 | 937k | } | 566 | | | 567 | 70.8k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 70.8k | filter = vf; | 569 | | | 570 | 70.8k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 796k | for (int y = 0; y < height; y++) { | 572 | 9.65M | for (int x = 0; x < width; x++) | 573 | 8.93M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 725k | tmp += MAX_PB_SIZE; | 575 | 725k | dst += dst_stride; | 576 | 725k | } | 577 | 70.8k | } |
dsp.c:put_uni_chroma_w_hv_8 Line | Count | Source | 543 | 196k | { | 544 | 196k | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | 545 | 196k | int16_t *tmp = tmp_array; | 546 | 196k | const pixel *src = (const pixel *)_src; | 547 | 196k | pixel *dst = (pixel *)_dst; | 548 | 196k | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | 549 | 196k | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | 550 | 196k | const int8_t *filter = hf; | 551 | 196k | const int shift = denom + 14 - BIT_DEPTH; | 552 | 196k | #if BIT_DEPTH < 14 | 553 | 196k | const int offset = 1 << (shift - 1); | 554 | | #else | 555 | | const int offset = 0; | 556 | | #endif | 557 | | | 558 | 196k | src -= CHROMA_EXTRA_BEFORE * src_stride; | 559 | | | 560 | 2.07M | for (int y = 0; y < height + CHROMA_EXTRA; y++) { | 561 | 16.8M | for (int x = 0; x < width; x++) | 562 | 14.9M | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | 563 | 1.87M | src += src_stride; | 564 | 1.87M | tmp += MAX_PB_SIZE; | 565 | 1.87M | } | 566 | | | 567 | 196k | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | 568 | 196k | filter = vf; | 569 | | | 570 | 196k | ox = ox * (1 << (BIT_DEPTH - 8)); | 571 | 1.48M | for (int y = 0; y < height; y++) { | 572 | 12.4M | for (int x = 0; x < width; x++) | 573 | 11.2M | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | 574 | 1.28M | tmp += MAX_PB_SIZE; | 575 | 1.28M | dst += dst_stride; | 576 | 1.28M | } | 577 | 196k | } |
|