/src/ffmpeg/libavcodec/h264dsp_template.c
Line | Count | Source |
1 | | /* |
2 | | * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder |
3 | | * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * H.264 / AVC / MPEG-4 part10 DSP functions. |
25 | | * @author Michael Niedermayer <michaelni@gmx.at> |
26 | | */ |
27 | | |
28 | | #include "bit_depth_template.c" |
29 | | |
30 | 11.5G | #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) |
31 | 792M | #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) |
32 | | #define H264_WEIGHT(W) \ |
33 | | static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, ptrdiff_t stride, int height, \ |
34 | 82.9M | int log2_denom, int weight, int offset) \ |
35 | 82.9M | { \ |
36 | 82.9M | int y; \ |
37 | 82.9M | pixel *block = (pixel*)_block; \ |
38 | 82.9M | stride >>= sizeof(pixel)-1; \ |
39 | 82.9M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ |
40 | 82.9M | if(log2_denom) offset += 1<<(log2_denom-1); \ |
41 | 1.04G | for (y = 0; y < height; y++, block += stride) { \ |
42 | 965M | op_scale1(0); \ |
43 | 965M | op_scale1(1); \ |
44 | 965M | if(W==2) continue; \ |
45 | 965M | op_scale1(2); \ |
46 | 955M | op_scale1(3); \ |
47 | 955M | if(W==4) continue; \ |
48 | 955M | op_scale1(4); \ |
49 | 899M | op_scale1(5); \ |
50 | 899M | op_scale1(6); \ |
51 | 899M | op_scale1(7); \ |
52 | 899M | if(W==8) continue; \ |
53 | 899M | op_scale1(8); \ |
54 | 450M | op_scale1(9); \ |
55 | 450M | op_scale1(10); \ |
56 | 450M | op_scale1(11); \ |
57 | 450M | op_scale1(12); \ |
58 | 450M | op_scale1(13); \ |
59 | 450M | op_scale1(14); \ |
60 | 450M | op_scale1(15); \ |
61 | 450M | } \ |
62 | 82.9M | } \ h264dsp.c:weight_h264_pixels16_9_c Line | Count | Source | 34 | 5.27M | int log2_denom, int weight, int offset) \ | 35 | 5.27M | { \ | 36 | 5.27M | int y; \ | 37 | 5.27M | pixel *block = (pixel*)_block; \ | 38 | 5.27M | stride >>= sizeof(pixel)-1; \ | 39 | 5.27M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 5.27M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 88.7M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 83.4M | op_scale1(0); \ | 43 | 83.4M | op_scale1(1); \ | 44 | 83.4M | if(W==2) continue; \ | 45 | 83.4M | op_scale1(2); \ | 46 | 83.4M | op_scale1(3); \ | 47 | 83.4M | if(W==4) continue; \ | 48 | 83.4M | op_scale1(4); \ | 49 | 83.4M | op_scale1(5); \ | 50 | 83.4M | op_scale1(6); \ | 51 | 83.4M | op_scale1(7); \ | 52 | 83.4M | if(W==8) continue; \ | 53 | 83.4M | op_scale1(8); \ | 54 | 83.4M | op_scale1(9); \ | 55 | 83.4M | op_scale1(10); \ | 56 | 83.4M | op_scale1(11); \ | 57 | 83.4M | op_scale1(12); \ | 58 | 83.4M | op_scale1(13); \ | 59 | 83.4M | op_scale1(14); \ | 60 | 83.4M | op_scale1(15); \ | 61 | 83.4M | } \ | 62 | 5.27M | } \ |
h264dsp.c:weight_h264_pixels8_9_c Line | Count | Source | 34 | 8.97M | int log2_denom, int weight, int offset) \ | 35 | 8.97M | { \ | 36 | 8.97M | int y; \ | 37 | 8.97M | pixel *block = (pixel*)_block; \ | 38 | 8.97M | stride >>= sizeof(pixel)-1; \ | 39 | 8.97M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 8.97M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 115M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 106M | op_scale1(0); \ | 43 | 106M | op_scale1(1); \ | 44 | 106M | if(W==2) continue; \ | 45 | 106M | op_scale1(2); \ | 46 | 106M | op_scale1(3); \ | 47 | 106M | if(W==4) continue; \ | 48 | 106M | op_scale1(4); \ | 49 | 106M | op_scale1(5); \ | 50 | 106M | op_scale1(6); \ | 51 | 106M | op_scale1(7); \ | 52 | 106M | if(W==8) continue; \ | 53 | 106M | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 8.97M | } \ |
h264dsp.c:weight_h264_pixels4_9_c Line | Count | Source | 34 | 1.25M | int log2_denom, int weight, int offset) \ | 35 | 1.25M | { \ | 36 | 1.25M | int y; \ | 37 | 1.25M | pixel *block = (pixel*)_block; \ | 38 | 1.25M | stride >>= sizeof(pixel)-1; \ | 39 | 1.25M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 1.25M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 8.91M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 7.66M | op_scale1(0); \ | 43 | 7.66M | op_scale1(1); \ | 44 | 7.66M | if(W==2) continue; \ | 45 | 7.66M | op_scale1(2); \ | 46 | 7.66M | op_scale1(3); \ | 47 | 7.66M | if(W==4) continue; \ | 48 | 7.66M | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 1.25M | } \ |
h264dsp.c:weight_h264_pixels2_9_c Line | Count | Source | 34 | 507k | int log2_denom, int weight, int offset) \ | 35 | 507k | { \ | 36 | 507k | int y; \ | 37 | 507k | pixel *block = (pixel*)_block; \ | 38 | 507k | stride >>= sizeof(pixel)-1; \ | 39 | 507k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 507k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 2.99M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 2.48M | op_scale1(0); \ | 43 | 2.48M | op_scale1(1); \ | 44 | 2.48M | if(W==2) continue; \ | 45 | 2.48M | op_scale1(2); \ | 46 | 0 | op_scale1(3); \ | 47 | 0 | if(W==4) continue; \ | 48 | 0 | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 507k | } \ |
h264dsp.c:weight_h264_pixels16_10_c Line | Count | Source | 34 | 12.1M | int log2_denom, int weight, int offset) \ | 35 | 12.1M | { \ | 36 | 12.1M | int y; \ | 37 | 12.1M | pixel *block = (pixel*)_block; \ | 38 | 12.1M | stride >>= sizeof(pixel)-1; \ | 39 | 12.1M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 12.1M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 204M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 192M | op_scale1(0); \ | 43 | 192M | op_scale1(1); \ | 44 | 192M | if(W==2) continue; \ | 45 | 192M | op_scale1(2); \ | 46 | 192M | op_scale1(3); \ | 47 | 192M | if(W==4) continue; \ | 48 | 192M | op_scale1(4); \ | 49 | 192M | op_scale1(5); \ | 50 | 192M | op_scale1(6); \ | 51 | 192M | op_scale1(7); \ | 52 | 192M | if(W==8) continue; \ | 53 | 192M | op_scale1(8); \ | 54 | 192M | op_scale1(9); \ | 55 | 192M | op_scale1(10); \ | 56 | 192M | op_scale1(11); \ | 57 | 192M | op_scale1(12); \ | 58 | 192M | op_scale1(13); \ | 59 | 192M | op_scale1(14); \ | 60 | 192M | op_scale1(15); \ | 61 | 192M | } \ | 62 | 12.1M | } \ |
h264dsp.c:weight_h264_pixels8_10_c Line | Count | Source | 34 | 15.0M | int log2_denom, int weight, int offset) \ | 35 | 15.0M | { \ | 36 | 15.0M | int y; \ | 37 | 15.0M | pixel *block = (pixel*)_block; \ | 38 | 15.0M | stride >>= sizeof(pixel)-1; \ | 39 | 15.0M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 15.0M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 181M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 166M | op_scale1(0); \ | 43 | 166M | op_scale1(1); \ | 44 | 166M | if(W==2) continue; \ | 45 | 166M | op_scale1(2); \ | 46 | 166M | op_scale1(3); \ | 47 | 166M | if(W==4) continue; \ | 48 | 166M | op_scale1(4); \ | 49 | 166M | op_scale1(5); \ | 50 | 166M | op_scale1(6); \ | 51 | 166M | op_scale1(7); \ | 52 | 166M | if(W==8) continue; \ | 53 | 166M | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 15.0M | } \ |
h264dsp.c:weight_h264_pixels4_10_c Line | Count | Source | 34 | 2.18M | int log2_denom, int weight, int offset) \ | 35 | 2.18M | { \ | 36 | 2.18M | int y; \ | 37 | 2.18M | pixel *block = (pixel*)_block; \ | 38 | 2.18M | stride >>= sizeof(pixel)-1; \ | 39 | 2.18M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 2.18M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 9.97M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 7.78M | op_scale1(0); \ | 43 | 7.78M | op_scale1(1); \ | 44 | 7.78M | if(W==2) continue; \ | 45 | 7.78M | op_scale1(2); \ | 46 | 7.78M | op_scale1(3); \ | 47 | 7.78M | if(W==4) continue; \ | 48 | 7.78M | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 2.18M | } \ |
h264dsp.c:weight_h264_pixels2_10_c Line | Count | Source | 34 | 510k | int log2_denom, int weight, int offset) \ | 35 | 510k | { \ | 36 | 510k | int y; \ | 37 | 510k | pixel *block = (pixel*)_block; \ | 38 | 510k | stride >>= sizeof(pixel)-1; \ | 39 | 510k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 510k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 2.04M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 1.53M | op_scale1(0); \ | 43 | 1.53M | op_scale1(1); \ | 44 | 1.53M | if(W==2) continue; \ | 45 | 1.53M | op_scale1(2); \ | 46 | 0 | op_scale1(3); \ | 47 | 0 | if(W==4) continue; \ | 48 | 0 | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 510k | } \ |
h264dsp.c:weight_h264_pixels16_12_c Line | Count | Source | 34 | 730k | int log2_denom, int weight, int offset) \ | 35 | 730k | { \ | 36 | 730k | int y; \ | 37 | 730k | pixel *block = (pixel*)_block; \ | 38 | 730k | stride >>= sizeof(pixel)-1; \ | 39 | 730k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 730k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 11.6M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 10.8M | op_scale1(0); \ | 43 | 10.8M | op_scale1(1); \ | 44 | 10.8M | if(W==2) continue; \ | 45 | 10.8M | op_scale1(2); \ | 46 | 10.8M | op_scale1(3); \ | 47 | 10.8M | if(W==4) continue; \ | 48 | 10.8M | op_scale1(4); \ | 49 | 10.8M | op_scale1(5); \ | 50 | 10.8M | op_scale1(6); \ | 51 | 10.8M | op_scale1(7); \ | 52 | 10.8M | if(W==8) continue; \ | 53 | 10.8M | op_scale1(8); \ | 54 | 10.8M | op_scale1(9); \ | 55 | 10.8M | op_scale1(10); \ | 56 | 10.8M | op_scale1(11); \ | 57 | 10.8M | op_scale1(12); \ | 58 | 10.8M | op_scale1(13); \ | 59 | 10.8M | op_scale1(14); \ | 60 | 10.8M | op_scale1(15); \ | 61 | 10.8M | } \ | 62 | 730k | } \ |
h264dsp.c:weight_h264_pixels8_12_c Line | Count | Source | 34 | 1.48M | int log2_denom, int weight, int offset) \ | 35 | 1.48M | { \ | 36 | 1.48M | int y; \ | 37 | 1.48M | pixel *block = (pixel*)_block; \ | 38 | 1.48M | stride >>= sizeof(pixel)-1; \ | 39 | 1.48M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 1.48M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 15.5M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 14.0M | op_scale1(0); \ | 43 | 14.0M | op_scale1(1); \ | 44 | 14.0M | if(W==2) continue; \ | 45 | 14.0M | op_scale1(2); \ | 46 | 14.0M | op_scale1(3); \ | 47 | 14.0M | if(W==4) continue; \ | 48 | 14.0M | op_scale1(4); \ | 49 | 14.0M | op_scale1(5); \ | 50 | 14.0M | op_scale1(6); \ | 51 | 14.0M | op_scale1(7); \ | 52 | 14.0M | if(W==8) continue; \ | 53 | 14.0M | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 1.48M | } \ |
h264dsp.c:weight_h264_pixels4_12_c Line | Count | Source | 34 | 1.20M | int log2_denom, int weight, int offset) \ | 35 | 1.20M | { \ | 36 | 1.20M | int y; \ | 37 | 1.20M | pixel *block = (pixel*)_block; \ | 38 | 1.20M | stride >>= sizeof(pixel)-1; \ | 39 | 1.20M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 1.20M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 7.42M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 6.21M | op_scale1(0); \ | 43 | 6.21M | op_scale1(1); \ | 44 | 6.21M | if(W==2) continue; \ | 45 | 6.21M | op_scale1(2); \ | 46 | 6.21M | op_scale1(3); \ | 47 | 6.21M | if(W==4) continue; \ | 48 | 6.21M | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 1.20M | } \ |
h264dsp.c:weight_h264_pixels2_12_c Line | Count | Source | 34 | 513k | int log2_denom, int weight, int offset) \ | 35 | 513k | { \ | 36 | 513k | int y; \ | 37 | 513k | pixel *block = (pixel*)_block; \ | 38 | 513k | stride >>= sizeof(pixel)-1; \ | 39 | 513k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 513k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 2.43M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 1.92M | op_scale1(0); \ | 43 | 1.92M | op_scale1(1); \ | 44 | 1.92M | if(W==2) continue; \ | 45 | 1.92M | op_scale1(2); \ | 46 | 0 | op_scale1(3); \ | 47 | 0 | if(W==4) continue; \ | 48 | 0 | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 513k | } \ |
h264dsp.c:weight_h264_pixels16_14_c Line | Count | Source | 34 | 385k | int log2_denom, int weight, int offset) \ | 35 | 385k | { \ | 36 | 385k | int y; \ | 37 | 385k | pixel *block = (pixel*)_block; \ | 38 | 385k | stride >>= sizeof(pixel)-1; \ | 39 | 385k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 385k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 6.26M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 5.87M | op_scale1(0); \ | 43 | 5.87M | op_scale1(1); \ | 44 | 5.87M | if(W==2) continue; \ | 45 | 5.87M | op_scale1(2); \ | 46 | 5.87M | op_scale1(3); \ | 47 | 5.87M | if(W==4) continue; \ | 48 | 5.87M | op_scale1(4); \ | 49 | 5.87M | op_scale1(5); \ | 50 | 5.87M | op_scale1(6); \ | 51 | 5.87M | op_scale1(7); \ | 52 | 5.87M | if(W==8) continue; \ | 53 | 5.87M | op_scale1(8); \ | 54 | 5.87M | op_scale1(9); \ | 55 | 5.87M | op_scale1(10); \ | 56 | 5.87M | op_scale1(11); \ | 57 | 5.87M | op_scale1(12); \ | 58 | 5.87M | op_scale1(13); \ | 59 | 5.87M | op_scale1(14); \ | 60 | 5.87M | op_scale1(15); \ | 61 | 5.87M | } \ | 62 | 385k | } \ |
h264dsp.c:weight_h264_pixels8_14_c Line | Count | Source | 34 | 663k | int log2_denom, int weight, int offset) \ | 35 | 663k | { \ | 36 | 663k | int y; \ | 37 | 663k | pixel *block = (pixel*)_block; \ | 38 | 663k | stride >>= sizeof(pixel)-1; \ | 39 | 663k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 663k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 8.97M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 8.31M | op_scale1(0); \ | 43 | 8.31M | op_scale1(1); \ | 44 | 8.31M | if(W==2) continue; \ | 45 | 8.31M | op_scale1(2); \ | 46 | 8.31M | op_scale1(3); \ | 47 | 8.31M | if(W==4) continue; \ | 48 | 8.31M | op_scale1(4); \ | 49 | 8.31M | op_scale1(5); \ | 50 | 8.31M | op_scale1(6); \ | 51 | 8.31M | op_scale1(7); \ | 52 | 8.31M | if(W==8) continue; \ | 53 | 8.31M | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 663k | } \ |
h264dsp.c:weight_h264_pixels4_14_c Line | Count | Source | 34 | 463k | int log2_denom, int weight, int offset) \ | 35 | 463k | { \ | 36 | 463k | int y; \ | 37 | 463k | pixel *block = (pixel*)_block; \ | 38 | 463k | stride >>= sizeof(pixel)-1; \ | 39 | 463k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 463k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 4.06M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 3.59M | op_scale1(0); \ | 43 | 3.59M | op_scale1(1); \ | 44 | 3.59M | if(W==2) continue; \ | 45 | 3.59M | op_scale1(2); \ | 46 | 3.59M | op_scale1(3); \ | 47 | 3.59M | if(W==4) continue; \ | 48 | 3.59M | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 463k | } \ |
h264dsp.c:weight_h264_pixels2_14_c Line | Count | Source | 34 | 142k | int log2_denom, int weight, int offset) \ | 35 | 142k | { \ | 36 | 142k | int y; \ | 37 | 142k | pixel *block = (pixel*)_block; \ | 38 | 142k | stride >>= sizeof(pixel)-1; \ | 39 | 142k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 142k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 956k | for (y = 0; y < height; y++, block += stride) { \ | 42 | 814k | op_scale1(0); \ | 43 | 814k | op_scale1(1); \ | 44 | 814k | if(W==2) continue; \ | 45 | 814k | op_scale1(2); \ | 46 | 0 | op_scale1(3); \ | 47 | 0 | if(W==4) continue; \ | 48 | 0 | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 142k | } \ |
h264dsp.c:weight_h264_pixels16_8_c Line | Count | Source | 34 | 9.97M | int log2_denom, int weight, int offset) \ | 35 | 9.97M | { \ | 36 | 9.97M | int y; \ | 37 | 9.97M | pixel *block = (pixel*)_block; \ | 38 | 9.97M | stride >>= sizeof(pixel)-1; \ | 39 | 9.97M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 9.97M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 168M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 158M | op_scale1(0); \ | 43 | 158M | op_scale1(1); \ | 44 | 158M | if(W==2) continue; \ | 45 | 158M | op_scale1(2); \ | 46 | 158M | op_scale1(3); \ | 47 | 158M | if(W==4) continue; \ | 48 | 158M | op_scale1(4); \ | 49 | 158M | op_scale1(5); \ | 50 | 158M | op_scale1(6); \ | 51 | 158M | op_scale1(7); \ | 52 | 158M | if(W==8) continue; \ | 53 | 158M | op_scale1(8); \ | 54 | 158M | op_scale1(9); \ | 55 | 158M | op_scale1(10); \ | 56 | 158M | op_scale1(11); \ | 57 | 158M | op_scale1(12); \ | 58 | 158M | op_scale1(13); \ | 59 | 158M | op_scale1(14); \ | 60 | 158M | op_scale1(15); \ | 61 | 158M | } \ | 62 | 9.97M | } \ |
h264dsp.c:weight_h264_pixels8_8_c Line | Count | Source | 34 | 16.4M | int log2_denom, int weight, int offset) \ | 35 | 16.4M | { \ | 36 | 16.4M | int y; \ | 37 | 16.4M | pixel *block = (pixel*)_block; \ | 38 | 16.4M | stride >>= sizeof(pixel)-1; \ | 39 | 16.4M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 16.4M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 169M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 153M | op_scale1(0); \ | 43 | 153M | op_scale1(1); \ | 44 | 153M | if(W==2) continue; \ | 45 | 153M | op_scale1(2); \ | 46 | 153M | op_scale1(3); \ | 47 | 153M | if(W==4) continue; \ | 48 | 153M | op_scale1(4); \ | 49 | 153M | op_scale1(5); \ | 50 | 153M | op_scale1(6); \ | 51 | 153M | op_scale1(7); \ | 52 | 153M | if(W==8) continue; \ | 53 | 153M | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 16.4M | } \ |
h264dsp.c:weight_h264_pixels4_8_c Line | Count | Source | 34 | 4.32M | int log2_denom, int weight, int offset) \ | 35 | 4.32M | { \ | 36 | 4.32M | int y; \ | 37 | 4.32M | pixel *block = (pixel*)_block; \ | 38 | 4.32M | stride >>= sizeof(pixel)-1; \ | 39 | 4.32M | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 4.32M | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 35.1M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 30.7M | op_scale1(0); \ | 43 | 30.7M | op_scale1(1); \ | 44 | 30.7M | if(W==2) continue; \ | 45 | 30.7M | op_scale1(2); \ | 46 | 30.7M | op_scale1(3); \ | 47 | 30.7M | if(W==4) continue; \ | 48 | 30.7M | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 4.32M | } \ |
h264dsp.c:weight_h264_pixels2_8_c Line | Count | Source | 34 | 723k | int log2_denom, int weight, int offset) \ | 35 | 723k | { \ | 36 | 723k | int y; \ | 37 | 723k | pixel *block = (pixel*)_block; \ | 38 | 723k | stride >>= sizeof(pixel)-1; \ | 39 | 723k | offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \ | 40 | 723k | if(log2_denom) offset += 1<<(log2_denom-1); \ | 41 | 3.81M | for (y = 0; y < height; y++, block += stride) { \ | 42 | 3.09M | op_scale1(0); \ | 43 | 3.09M | op_scale1(1); \ | 44 | 3.09M | if(W==2) continue; \ | 45 | 3.09M | op_scale1(2); \ | 46 | 0 | op_scale1(3); \ | 47 | 0 | if(W==4) continue; \ | 48 | 0 | op_scale1(4); \ | 49 | 0 | op_scale1(5); \ | 50 | 0 | op_scale1(6); \ | 51 | 0 | op_scale1(7); \ | 52 | 0 | if(W==8) continue; \ | 53 | 0 | op_scale1(8); \ | 54 | 0 | op_scale1(9); \ | 55 | 0 | op_scale1(10); \ | 56 | 0 | op_scale1(11); \ | 57 | 0 | op_scale1(12); \ | 58 | 0 | op_scale1(13); \ | 59 | 0 | op_scale1(14); \ | 60 | 0 | op_scale1(15); \ | 61 | 0 | } \ | 62 | 723k | } \ |
|
63 | | static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, int height, \ |
64 | 13.0M | int log2_denom, int weightd, int weights, int offset) \ |
65 | 13.0M | { \ |
66 | 13.0M | int y; \ |
67 | 13.0M | pixel *dst = (pixel*)_dst; \ |
68 | 13.0M | pixel *src = (pixel*)_src; \ |
69 | 13.0M | stride >>= sizeof(pixel)-1; \ |
70 | 13.0M | offset = (unsigned)offset << (BIT_DEPTH-8); \ |
71 | 13.0M | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ |
72 | 106M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ |
73 | 93.1M | op_scale2(0); \ |
74 | 93.1M | op_scale2(1); \ |
75 | 93.1M | if(W==2) continue; \ |
76 | 93.1M | op_scale2(2); \ |
77 | 89.3M | op_scale2(3); \ |
78 | 89.3M | if(W==4) continue; \ |
79 | 89.3M | op_scale2(4); \ |
80 | 59.3M | op_scale2(5); \ |
81 | 59.3M | op_scale2(6); \ |
82 | 59.3M | op_scale2(7); \ |
83 | 59.3M | if(W==8) continue; \ |
84 | 59.3M | op_scale2(8); \ |
85 | 13.8M | op_scale2(9); \ |
86 | 13.8M | op_scale2(10); \ |
87 | 13.8M | op_scale2(11); \ |
88 | 13.8M | op_scale2(12); \ |
89 | 13.8M | op_scale2(13); \ |
90 | 13.8M | op_scale2(14); \ |
91 | 13.8M | op_scale2(15); \ |
92 | 13.8M | } \ |
93 | 13.0M | } h264dsp.c:biweight_h264_pixels16_9_c Line | Count | Source | 64 | 150k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 150k | { \ | 66 | 150k | int y; \ | 67 | 150k | pixel *dst = (pixel*)_dst; \ | 68 | 150k | pixel *src = (pixel*)_src; \ | 69 | 150k | stride >>= sizeof(pixel)-1; \ | 70 | 150k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 150k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 2.42M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 2.27M | op_scale2(0); \ | 74 | 2.27M | op_scale2(1); \ | 75 | 2.27M | if(W==2) continue; \ | 76 | 2.27M | op_scale2(2); \ | 77 | 2.27M | op_scale2(3); \ | 78 | 2.27M | if(W==4) continue; \ | 79 | 2.27M | op_scale2(4); \ | 80 | 2.27M | op_scale2(5); \ | 81 | 2.27M | op_scale2(6); \ | 82 | 2.27M | op_scale2(7); \ | 83 | 2.27M | if(W==8) continue; \ | 84 | 2.27M | op_scale2(8); \ | 85 | 2.27M | op_scale2(9); \ | 86 | 2.27M | op_scale2(10); \ | 87 | 2.27M | op_scale2(11); \ | 88 | 2.27M | op_scale2(12); \ | 89 | 2.27M | op_scale2(13); \ | 90 | 2.27M | op_scale2(14); \ | 91 | 2.27M | op_scale2(15); \ | 92 | 2.27M | } \ | 93 | 150k | } |
h264dsp.c:biweight_h264_pixels8_9_c Line | Count | Source | 64 | 537k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 537k | { \ | 66 | 537k | int y; \ | 67 | 537k | pixel *dst = (pixel*)_dst; \ | 68 | 537k | pixel *src = (pixel*)_src; \ | 69 | 537k | stride >>= sizeof(pixel)-1; \ | 70 | 537k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 537k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 5.71M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 5.18M | op_scale2(0); \ | 74 | 5.18M | op_scale2(1); \ | 75 | 5.18M | if(W==2) continue; \ | 76 | 5.18M | op_scale2(2); \ | 77 | 5.18M | op_scale2(3); \ | 78 | 5.18M | if(W==4) continue; \ | 79 | 5.18M | op_scale2(4); \ | 80 | 5.18M | op_scale2(5); \ | 81 | 5.18M | op_scale2(6); \ | 82 | 5.18M | op_scale2(7); \ | 83 | 5.18M | if(W==8) continue; \ | 84 | 5.18M | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 537k | } |
h264dsp.c:biweight_h264_pixels4_9_c Line | Count | Source | 64 | 753k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 753k | { \ | 66 | 753k | int y; \ | 67 | 753k | pixel *dst = (pixel*)_dst; \ | 68 | 753k | pixel *src = (pixel*)_src; \ | 69 | 753k | stride >>= sizeof(pixel)-1; \ | 70 | 753k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 753k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 4.35M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 3.60M | op_scale2(0); \ | 74 | 3.60M | op_scale2(1); \ | 75 | 3.60M | if(W==2) continue; \ | 76 | 3.60M | op_scale2(2); \ | 77 | 3.60M | op_scale2(3); \ | 78 | 3.60M | if(W==4) continue; \ | 79 | 3.60M | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 753k | } |
h264dsp.c:biweight_h264_pixels2_9_c Line | Count | Source | 64 | 161k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 161k | { \ | 66 | 161k | int y; \ | 67 | 161k | pixel *dst = (pixel*)_dst; \ | 68 | 161k | pixel *src = (pixel*)_src; \ | 69 | 161k | stride >>= sizeof(pixel)-1; \ | 70 | 161k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 161k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 791k | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 629k | op_scale2(0); \ | 74 | 629k | op_scale2(1); \ | 75 | 629k | if(W==2) continue; \ | 76 | 629k | op_scale2(2); \ | 77 | 0 | op_scale2(3); \ | 78 | 0 | if(W==4) continue; \ | 79 | 0 | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 161k | } |
h264dsp.c:biweight_h264_pixels16_10_c Line | Count | Source | 64 | 394k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 394k | { \ | 66 | 394k | int y; \ | 67 | 394k | pixel *dst = (pixel*)_dst; \ | 68 | 394k | pixel *src = (pixel*)_src; \ | 69 | 394k | stride >>= sizeof(pixel)-1; \ | 70 | 394k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 394k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 6.54M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 6.15M | op_scale2(0); \ | 74 | 6.15M | op_scale2(1); \ | 75 | 6.15M | if(W==2) continue; \ | 76 | 6.15M | op_scale2(2); \ | 77 | 6.15M | op_scale2(3); \ | 78 | 6.15M | if(W==4) continue; \ | 79 | 6.15M | op_scale2(4); \ | 80 | 6.15M | op_scale2(5); \ | 81 | 6.15M | op_scale2(6); \ | 82 | 6.15M | op_scale2(7); \ | 83 | 6.15M | if(W==8) continue; \ | 84 | 6.15M | op_scale2(8); \ | 85 | 6.15M | op_scale2(9); \ | 86 | 6.15M | op_scale2(10); \ | 87 | 6.15M | op_scale2(11); \ | 88 | 6.15M | op_scale2(12); \ | 89 | 6.15M | op_scale2(13); \ | 90 | 6.15M | op_scale2(14); \ | 91 | 6.15M | op_scale2(15); \ | 92 | 6.15M | } \ | 93 | 394k | } |
h264dsp.c:biweight_h264_pixels8_10_c Line | Count | Source | 64 | 2.33M | int log2_denom, int weightd, int weights, int offset) \ | 65 | 2.33M | { \ | 66 | 2.33M | int y; \ | 67 | 2.33M | pixel *dst = (pixel*)_dst; \ | 68 | 2.33M | pixel *src = (pixel*)_src; \ | 69 | 2.33M | stride >>= sizeof(pixel)-1; \ | 70 | 2.33M | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 2.33M | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 25.5M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 23.2M | op_scale2(0); \ | 74 | 23.2M | op_scale2(1); \ | 75 | 23.2M | if(W==2) continue; \ | 76 | 23.2M | op_scale2(2); \ | 77 | 23.2M | op_scale2(3); \ | 78 | 23.2M | if(W==4) continue; \ | 79 | 23.2M | op_scale2(4); \ | 80 | 23.2M | op_scale2(5); \ | 81 | 23.2M | op_scale2(6); \ | 82 | 23.2M | op_scale2(7); \ | 83 | 23.2M | if(W==8) continue; \ | 84 | 23.2M | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 2.33M | } |
h264dsp.c:biweight_h264_pixels4_10_c Line | Count | Source | 64 | 3.03M | int log2_denom, int weightd, int weights, int offset) \ | 65 | 3.03M | { \ | 66 | 3.03M | int y; \ | 67 | 3.03M | pixel *dst = (pixel*)_dst; \ | 68 | 3.03M | pixel *src = (pixel*)_src; \ | 69 | 3.03M | stride >>= sizeof(pixel)-1; \ | 70 | 3.03M | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 3.03M | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 15.6M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 12.5M | op_scale2(0); \ | 74 | 12.5M | op_scale2(1); \ | 75 | 12.5M | if(W==2) continue; \ | 76 | 12.5M | op_scale2(2); \ | 77 | 12.5M | op_scale2(3); \ | 78 | 12.5M | if(W==4) continue; \ | 79 | 12.5M | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 3.03M | } |
h264dsp.c:biweight_h264_pixels2_10_c Line | Count | Source | 64 | 86.5k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 86.5k | { \ | 66 | 86.5k | int y; \ | 67 | 86.5k | pixel *dst = (pixel*)_dst; \ | 68 | 86.5k | pixel *src = (pixel*)_src; \ | 69 | 86.5k | stride >>= sizeof(pixel)-1; \ | 70 | 86.5k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 86.5k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 269k | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 182k | op_scale2(0); \ | 74 | 182k | op_scale2(1); \ | 75 | 182k | if(W==2) continue; \ | 76 | 182k | op_scale2(2); \ | 77 | 0 | op_scale2(3); \ | 78 | 0 | if(W==4) continue; \ | 79 | 0 | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 86.5k | } |
h264dsp.c:biweight_h264_pixels16_12_c Line | Count | Source | 64 | 81.7k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 81.7k | { \ | 66 | 81.7k | int y; \ | 67 | 81.7k | pixel *dst = (pixel*)_dst; \ | 68 | 81.7k | pixel *src = (pixel*)_src; \ | 69 | 81.7k | stride >>= sizeof(pixel)-1; \ | 70 | 81.7k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 81.7k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 1.31M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 1.23M | op_scale2(0); \ | 74 | 1.23M | op_scale2(1); \ | 75 | 1.23M | if(W==2) continue; \ | 76 | 1.23M | op_scale2(2); \ | 77 | 1.23M | op_scale2(3); \ | 78 | 1.23M | if(W==4) continue; \ | 79 | 1.23M | op_scale2(4); \ | 80 | 1.23M | op_scale2(5); \ | 81 | 1.23M | op_scale2(6); \ | 82 | 1.23M | op_scale2(7); \ | 83 | 1.23M | if(W==8) continue; \ | 84 | 1.23M | op_scale2(8); \ | 85 | 1.23M | op_scale2(9); \ | 86 | 1.23M | op_scale2(10); \ | 87 | 1.23M | op_scale2(11); \ | 88 | 1.23M | op_scale2(12); \ | 89 | 1.23M | op_scale2(13); \ | 90 | 1.23M | op_scale2(14); \ | 91 | 1.23M | op_scale2(15); \ | 92 | 1.23M | } \ | 93 | 81.7k | } |
h264dsp.c:biweight_h264_pixels8_12_c Line | Count | Source | 64 | 332k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 332k | { \ | 66 | 332k | int y; \ | 67 | 332k | pixel *dst = (pixel*)_dst; \ | 68 | 332k | pixel *src = (pixel*)_src; \ | 69 | 332k | stride >>= sizeof(pixel)-1; \ | 70 | 332k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 332k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 3.46M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 3.13M | op_scale2(0); \ | 74 | 3.13M | op_scale2(1); \ | 75 | 3.13M | if(W==2) continue; \ | 76 | 3.13M | op_scale2(2); \ | 77 | 3.13M | op_scale2(3); \ | 78 | 3.13M | if(W==4) continue; \ | 79 | 3.13M | op_scale2(4); \ | 80 | 3.13M | op_scale2(5); \ | 81 | 3.13M | op_scale2(6); \ | 82 | 3.13M | op_scale2(7); \ | 83 | 3.13M | if(W==8) continue; \ | 84 | 3.13M | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 332k | } |
h264dsp.c:biweight_h264_pixels4_12_c Line | Count | Source | 64 | 414k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 414k | { \ | 66 | 414k | int y; \ | 67 | 414k | pixel *dst = (pixel*)_dst; \ | 68 | 414k | pixel *src = (pixel*)_src; \ | 69 | 414k | stride >>= sizeof(pixel)-1; \ | 70 | 414k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 414k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 2.93M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 2.51M | op_scale2(0); \ | 74 | 2.51M | op_scale2(1); \ | 75 | 2.51M | if(W==2) continue; \ | 76 | 2.51M | op_scale2(2); \ | 77 | 2.51M | op_scale2(3); \ | 78 | 2.51M | if(W==4) continue; \ | 79 | 2.51M | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 414k | } |
h264dsp.c:biweight_h264_pixels2_12_c Line | Count | Source | 64 | 190k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 190k | { \ | 66 | 190k | int y; \ | 67 | 190k | pixel *dst = (pixel*)_dst; \ | 68 | 190k | pixel *src = (pixel*)_src; \ | 69 | 190k | stride >>= sizeof(pixel)-1; \ | 70 | 190k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 190k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 872k | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 681k | op_scale2(0); \ | 74 | 681k | op_scale2(1); \ | 75 | 681k | if(W==2) continue; \ | 76 | 681k | op_scale2(2); \ | 77 | 0 | op_scale2(3); \ | 78 | 0 | if(W==4) continue; \ | 79 | 0 | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 190k | } |
h264dsp.c:biweight_h264_pixels16_14_c Line | Count | Source | 64 | 66.8k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 66.8k | { \ | 66 | 66.8k | int y; \ | 67 | 66.8k | pixel *dst = (pixel*)_dst; \ | 68 | 66.8k | pixel *src = (pixel*)_src; \ | 69 | 66.8k | stride >>= sizeof(pixel)-1; \ | 70 | 66.8k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 66.8k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 1.01M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 945k | op_scale2(0); \ | 74 | 945k | op_scale2(1); \ | 75 | 945k | if(W==2) continue; \ | 76 | 945k | op_scale2(2); \ | 77 | 945k | op_scale2(3); \ | 78 | 945k | if(W==4) continue; \ | 79 | 945k | op_scale2(4); \ | 80 | 945k | op_scale2(5); \ | 81 | 945k | op_scale2(6); \ | 82 | 945k | op_scale2(7); \ | 83 | 945k | if(W==8) continue; \ | 84 | 945k | op_scale2(8); \ | 85 | 945k | op_scale2(9); \ | 86 | 945k | op_scale2(10); \ | 87 | 945k | op_scale2(11); \ | 88 | 945k | op_scale2(12); \ | 89 | 945k | op_scale2(13); \ | 90 | 945k | op_scale2(14); \ | 91 | 945k | op_scale2(15); \ | 92 | 945k | } \ | 93 | 66.8k | } |
h264dsp.c:biweight_h264_pixels8_14_c Line | Count | Source | 64 | 469k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 469k | { \ | 66 | 469k | int y; \ | 67 | 469k | pixel *dst = (pixel*)_dst; \ | 68 | 469k | pixel *src = (pixel*)_src; \ | 69 | 469k | stride >>= sizeof(pixel)-1; \ | 70 | 469k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 469k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 5.10M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 4.63M | op_scale2(0); \ | 74 | 4.63M | op_scale2(1); \ | 75 | 4.63M | if(W==2) continue; \ | 76 | 4.63M | op_scale2(2); \ | 77 | 4.63M | op_scale2(3); \ | 78 | 4.63M | if(W==4) continue; \ | 79 | 4.63M | op_scale2(4); \ | 80 | 4.63M | op_scale2(5); \ | 81 | 4.63M | op_scale2(6); \ | 82 | 4.63M | op_scale2(7); \ | 83 | 4.63M | if(W==8) continue; \ | 84 | 4.63M | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 469k | } |
h264dsp.c:biweight_h264_pixels4_14_c Line | Count | Source | 64 | 920k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 920k | { \ | 66 | 920k | int y; \ | 67 | 920k | pixel *dst = (pixel*)_dst; \ | 68 | 920k | pixel *src = (pixel*)_src; \ | 69 | 920k | stride >>= sizeof(pixel)-1; \ | 70 | 920k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 920k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 7.05M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 6.13M | op_scale2(0); \ | 74 | 6.13M | op_scale2(1); \ | 75 | 6.13M | if(W==2) continue; \ | 76 | 6.13M | op_scale2(2); \ | 77 | 6.13M | op_scale2(3); \ | 78 | 6.13M | if(W==4) continue; \ | 79 | 6.13M | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 920k | } |
h264dsp.c:biweight_h264_pixels2_14_c Line | Count | Source | 64 | 687k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 687k | { \ | 66 | 687k | int y; \ | 67 | 687k | pixel *dst = (pixel*)_dst; \ | 68 | 687k | pixel *src = (pixel*)_src; \ | 69 | 687k | stride >>= sizeof(pixel)-1; \ | 70 | 687k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 687k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 2.62M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 1.93M | op_scale2(0); \ | 74 | 1.93M | op_scale2(1); \ | 75 | 1.93M | if(W==2) continue; \ | 76 | 1.93M | op_scale2(2); \ | 77 | 0 | op_scale2(3); \ | 78 | 0 | if(W==4) continue; \ | 79 | 0 | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 687k | } |
h264dsp.c:biweight_h264_pixels16_8_c Line | Count | Source | 64 | 207k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 207k | { \ | 66 | 207k | int y; \ | 67 | 207k | pixel *dst = (pixel*)_dst; \ | 68 | 207k | pixel *src = (pixel*)_src; \ | 69 | 207k | stride >>= sizeof(pixel)-1; \ | 70 | 207k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 207k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 3.40M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 3.19M | op_scale2(0); \ | 74 | 3.19M | op_scale2(1); \ | 75 | 3.19M | if(W==2) continue; \ | 76 | 3.19M | op_scale2(2); \ | 77 | 3.19M | op_scale2(3); \ | 78 | 3.19M | if(W==4) continue; \ | 79 | 3.19M | op_scale2(4); \ | 80 | 3.19M | op_scale2(5); \ | 81 | 3.19M | op_scale2(6); \ | 82 | 3.19M | op_scale2(7); \ | 83 | 3.19M | if(W==8) continue; \ | 84 | 3.19M | op_scale2(8); \ | 85 | 3.19M | op_scale2(9); \ | 86 | 3.19M | op_scale2(10); \ | 87 | 3.19M | op_scale2(11); \ | 88 | 3.19M | op_scale2(12); \ | 89 | 3.19M | op_scale2(13); \ | 90 | 3.19M | op_scale2(14); \ | 91 | 3.19M | op_scale2(15); \ | 92 | 3.19M | } \ | 93 | 207k | } |
h264dsp.c:biweight_h264_pixels8_8_c Line | Count | Source | 64 | 1.06M | int log2_denom, int weightd, int weights, int offset) \ | 65 | 1.06M | { \ | 66 | 1.06M | int y; \ | 67 | 1.06M | pixel *dst = (pixel*)_dst; \ | 68 | 1.06M | pixel *src = (pixel*)_src; \ | 69 | 1.06M | stride >>= sizeof(pixel)-1; \ | 70 | 1.06M | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 1.06M | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 10.4M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 9.38M | op_scale2(0); \ | 74 | 9.38M | op_scale2(1); \ | 75 | 9.38M | if(W==2) continue; \ | 76 | 9.38M | op_scale2(2); \ | 77 | 9.38M | op_scale2(3); \ | 78 | 9.38M | if(W==4) continue; \ | 79 | 9.38M | op_scale2(4); \ | 80 | 9.38M | op_scale2(5); \ | 81 | 9.38M | op_scale2(6); \ | 82 | 9.38M | op_scale2(7); \ | 83 | 9.38M | if(W==8) continue; \ | 84 | 9.38M | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 1.06M | } |
h264dsp.c:biweight_h264_pixels4_8_c Line | Count | Source | 64 | 945k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 945k | { \ | 66 | 945k | int y; \ | 67 | 945k | pixel *dst = (pixel*)_dst; \ | 68 | 945k | pixel *src = (pixel*)_src; \ | 69 | 945k | stride >>= sizeof(pixel)-1; \ | 70 | 945k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 945k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 6.04M | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 5.09M | op_scale2(0); \ | 74 | 5.09M | op_scale2(1); \ | 75 | 5.09M | if(W==2) continue; \ | 76 | 5.09M | op_scale2(2); \ | 77 | 5.09M | op_scale2(3); \ | 78 | 5.09M | if(W==4) continue; \ | 79 | 5.09M | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 945k | } |
h264dsp.c:biweight_h264_pixels2_8_c Line | Count | Source | 64 | 186k | int log2_denom, int weightd, int weights, int offset) \ | 65 | 186k | { \ | 66 | 186k | int y; \ | 67 | 186k | pixel *dst = (pixel*)_dst; \ | 68 | 186k | pixel *src = (pixel*)_src; \ | 69 | 186k | stride >>= sizeof(pixel)-1; \ | 70 | 186k | offset = (unsigned)offset << (BIT_DEPTH-8); \ | 71 | 186k | offset = (unsigned)((offset + 1) | 1) << log2_denom; \ | 72 | 598k | for (y = 0; y < height; y++, dst += stride, src += stride) { \ | 73 | 411k | op_scale2(0); \ | 74 | 411k | op_scale2(1); \ | 75 | 411k | if(W==2) continue; \ | 76 | 411k | op_scale2(2); \ | 77 | 0 | op_scale2(3); \ | 78 | 0 | if(W==4) continue; \ | 79 | 0 | op_scale2(4); \ | 80 | 0 | op_scale2(5); \ | 81 | 0 | op_scale2(6); \ | 82 | 0 | op_scale2(7); \ | 83 | 0 | if(W==8) continue; \ | 84 | 0 | op_scale2(8); \ | 85 | 0 | op_scale2(9); \ | 86 | 0 | op_scale2(10); \ | 87 | 0 | op_scale2(11); \ | 88 | 0 | op_scale2(12); \ | 89 | 0 | op_scale2(13); \ | 90 | 0 | op_scale2(14); \ | 91 | 0 | op_scale2(15); \ | 92 | 0 | } \ | 93 | 186k | } |
|
94 | | |
95 | | H264_WEIGHT(16) |
96 | | H264_WEIGHT(8) |
97 | | H264_WEIGHT(4) |
98 | | H264_WEIGHT(2) |
99 | | |
100 | | #undef op_scale1 |
101 | | #undef op_scale2 |
102 | | #undef H264_WEIGHT |
103 | | |
104 | | static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta, int8_t *tc0) |
105 | 44.5M | { |
106 | 44.5M | pixel *pix = (pixel*)p_pix; |
107 | 44.5M | int i, d; |
108 | 44.5M | xstride >>= sizeof(pixel)-1; |
109 | 44.5M | ystride >>= sizeof(pixel)-1; |
110 | 44.5M | alpha <<= BIT_DEPTH - 8; |
111 | 44.5M | beta <<= BIT_DEPTH - 8; |
112 | 222M | for( i = 0; i < 4; i++ ) { |
113 | 178M | const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8)); |
114 | 178M | if( tc_orig < 0 ) { |
115 | 22.2M | pix += inner_iters*ystride; |
116 | 22.2M | continue; |
117 | 22.2M | } |
118 | 762M | for( d = 0; d < inner_iters; d++ ) { |
119 | 606M | const int p0 = pix[-1*xstride]; |
120 | 606M | const int p1 = pix[-2*xstride]; |
121 | 606M | const int p2 = pix[-3*xstride]; |
122 | 606M | const int q0 = pix[0]; |
123 | 606M | const int q1 = pix[1*xstride]; |
124 | 606M | const int q2 = pix[2*xstride]; |
125 | | |
126 | 606M | if( FFABS( p0 - q0 ) < alpha && |
127 | 571M | FFABS( p1 - p0 ) < beta && |
128 | 532M | FFABS( q1 - q0 ) < beta ) { |
129 | | |
130 | 501M | int tc = tc_orig; |
131 | 501M | int i_delta; |
132 | | |
133 | 501M | if( FFABS( p2 - p0 ) < beta ) { |
134 | 484M | if(tc_orig) |
135 | 466M | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); |
136 | 484M | tc++; |
137 | 484M | } |
138 | 501M | if( FFABS( q2 - q0 ) < beta ) { |
139 | 484M | if(tc_orig) |
140 | 466M | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); |
141 | 484M | tc++; |
142 | 484M | } |
143 | | |
144 | 501M | i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc ); |
145 | 501M | pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ |
146 | 501M | pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ |
147 | 501M | } |
148 | 606M | pix += ystride; |
149 | 606M | } |
150 | 155M | } |
151 | 44.5M | } h264dsp.c:h264_loop_filter_luma_9_c Line | Count | Source | 105 | 7.30M | { | 106 | 7.30M | pixel *pix = (pixel*)p_pix; | 107 | 7.30M | int i, d; | 108 | 7.30M | xstride >>= sizeof(pixel)-1; | 109 | 7.30M | ystride >>= sizeof(pixel)-1; | 110 | 7.30M | alpha <<= BIT_DEPTH - 8; | 111 | 7.30M | beta <<= BIT_DEPTH - 8; | 112 | 36.5M | for( i = 0; i < 4; i++ ) { | 113 | 29.2M | const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8)); | 114 | 29.2M | if( tc_orig < 0 ) { | 115 | 3.95M | pix += inner_iters*ystride; | 116 | 3.95M | continue; | 117 | 3.95M | } | 118 | 123M | for( d = 0; d < inner_iters; d++ ) { | 119 | 98.4M | const int p0 = pix[-1*xstride]; | 120 | 98.4M | const int p1 = pix[-2*xstride]; | 121 | 98.4M | const int p2 = pix[-3*xstride]; | 122 | 98.4M | const int q0 = pix[0]; | 123 | 98.4M | const int q1 = pix[1*xstride]; | 124 | 98.4M | const int q2 = pix[2*xstride]; | 125 | | | 126 | 98.4M | if( FFABS( p0 - q0 ) < alpha && | 127 | 90.6M | FFABS( p1 - p0 ) < beta && | 128 | 84.6M | FFABS( q1 - q0 ) < beta ) { | 129 | | | 130 | 78.8M | int tc = tc_orig; | 131 | 78.8M | int i_delta; | 132 | | | 133 | 78.8M | if( FFABS( p2 - p0 ) < beta ) { | 134 | 76.2M | if(tc_orig) | 135 | 74.5M | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); | 136 | 76.2M | tc++; | 137 | 76.2M | } | 138 | 78.8M | if( FFABS( q2 - q0 ) < beta ) { | 139 | 76.2M | if(tc_orig) | 140 | 74.5M | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); | 141 | 76.2M | tc++; | 142 | 76.2M | } | 143 | | | 144 | 78.8M | i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc ); | 145 | 78.8M | pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ | 146 | 78.8M | pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ | 147 | 78.8M | } | 148 | 98.4M | pix += ystride; | 149 | 98.4M | } | 150 | 25.2M | } | 151 | 7.30M | } |
h264dsp.c:h264_loop_filter_luma_10_c Line | Count | Source | 105 | 10.7M | { | 106 | 10.7M | pixel *pix = (pixel*)p_pix; | 107 | 10.7M | int i, d; | 108 | 10.7M | xstride >>= sizeof(pixel)-1; | 109 | 10.7M | ystride >>= sizeof(pixel)-1; | 110 | 10.7M | alpha <<= BIT_DEPTH - 8; | 111 | 10.7M | beta <<= BIT_DEPTH - 8; | 112 | 53.9M | for( i = 0; i < 4; i++ ) { | 113 | 43.1M | const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8)); | 114 | 43.1M | if( tc_orig < 0 ) { | 115 | 5.02M | pix += inner_iters*ystride; | 116 | 5.02M | continue; | 117 | 5.02M | } | 118 | 184M | for( d = 0; d < inner_iters; d++ ) { | 119 | 146M | const int p0 = pix[-1*xstride]; | 120 | 146M | const int p1 = pix[-2*xstride]; | 121 | 146M | const int p2 = pix[-3*xstride]; | 122 | 146M | const int q0 = pix[0]; | 123 | 146M | const int q1 = pix[1*xstride]; | 124 | 146M | const int q2 = pix[2*xstride]; | 125 | | | 126 | 146M | if( FFABS( p0 - q0 ) < alpha && | 127 | 136M | FFABS( p1 - p0 ) < beta && | 128 | 127M | FFABS( q1 - q0 ) < beta ) { | 129 | | | 130 | 118M | int tc = tc_orig; | 131 | 118M | int i_delta; | 132 | | | 133 | 118M | if( FFABS( p2 - p0 ) < beta ) { | 134 | 114M | if(tc_orig) | 135 | 109M | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); | 136 | 114M | tc++; | 137 | 114M | } | 138 | 118M | if( FFABS( q2 - q0 ) < beta ) { | 139 | 114M | if(tc_orig) | 140 | 110M | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); | 141 | 114M | tc++; | 142 | 114M | } | 143 | | | 144 | 118M | i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc ); | 145 | 118M | pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ | 146 | 118M | pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ | 147 | 118M | } | 148 | 146M | pix += ystride; | 149 | 146M | } | 150 | 38.1M | } | 151 | 10.7M | } |
h264dsp.c:h264_loop_filter_luma_12_c Line | Count | Source | 105 | 5.80M | { | 106 | 5.80M | pixel *pix = (pixel*)p_pix; | 107 | 5.80M | int i, d; | 108 | 5.80M | xstride >>= sizeof(pixel)-1; | 109 | 5.80M | ystride >>= sizeof(pixel)-1; | 110 | 5.80M | alpha <<= BIT_DEPTH - 8; | 111 | 5.80M | beta <<= BIT_DEPTH - 8; | 112 | 29.0M | for( i = 0; i < 4; i++ ) { | 113 | 23.2M | const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8)); | 114 | 23.2M | if( tc_orig < 0 ) { | 115 | 3.50M | pix += inner_iters*ystride; | 116 | 3.50M | continue; | 117 | 3.50M | } | 118 | 94.4M | for( d = 0; d < inner_iters; d++ ) { | 119 | 74.7M | const int p0 = pix[-1*xstride]; | 120 | 74.7M | const int p1 = pix[-2*xstride]; | 121 | 74.7M | const int p2 = pix[-3*xstride]; | 122 | 74.7M | const int q0 = pix[0]; | 123 | 74.7M | const int q1 = pix[1*xstride]; | 124 | 74.7M | const int q2 = pix[2*xstride]; | 125 | | | 126 | 74.7M | if( FFABS( p0 - q0 ) < alpha && | 127 | 69.7M | FFABS( p1 - p0 ) < beta && | 128 | 64.7M | FFABS( q1 - q0 ) < beta ) { | 129 | | | 130 | 60.8M | int tc = tc_orig; | 131 | 60.8M | int i_delta; | 132 | | | 133 | 60.8M | if( FFABS( p2 - p0 ) < beta ) { | 134 | 58.4M | if(tc_orig) | 135 | 54.2M | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); | 136 | 58.4M | tc++; | 137 | 58.4M | } | 138 | 60.8M | if( FFABS( q2 - q0 ) < beta ) { | 139 | 58.3M | if(tc_orig) | 140 | 54.1M | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); | 141 | 58.3M | tc++; | 142 | 58.3M | } | 143 | | | 144 | 60.8M | i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc ); | 145 | 60.8M | pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ | 146 | 60.8M | pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ | 147 | 60.8M | } | 148 | 74.7M | pix += ystride; | 149 | 74.7M | } | 150 | 19.7M | } | 151 | 5.80M | } |
h264dsp.c:h264_loop_filter_luma_14_c Line | Count | Source | 105 | 4.74M | { | 106 | 4.74M | pixel *pix = (pixel*)p_pix; | 107 | 4.74M | int i, d; | 108 | 4.74M | xstride >>= sizeof(pixel)-1; | 109 | 4.74M | ystride >>= sizeof(pixel)-1; | 110 | 4.74M | alpha <<= BIT_DEPTH - 8; | 111 | 4.74M | beta <<= BIT_DEPTH - 8; | 112 | 23.7M | for( i = 0; i < 4; i++ ) { | 113 | 18.9M | const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8)); | 114 | 18.9M | if( tc_orig < 0 ) { | 115 | 2.04M | pix += inner_iters*ystride; | 116 | 2.04M | continue; | 117 | 2.04M | } | 118 | 81.3M | for( d = 0; d < inner_iters; d++ ) { | 119 | 64.4M | const int p0 = pix[-1*xstride]; | 120 | 64.4M | const int p1 = pix[-2*xstride]; | 121 | 64.4M | const int p2 = pix[-3*xstride]; | 122 | 64.4M | const int q0 = pix[0]; | 123 | 64.4M | const int q1 = pix[1*xstride]; | 124 | 64.4M | const int q2 = pix[2*xstride]; | 125 | | | 126 | 64.4M | if( FFABS( p0 - q0 ) < alpha && | 127 | 60.8M | FFABS( p1 - p0 ) < beta && | 128 | 57.8M | FFABS( q1 - q0 ) < beta ) { | 129 | | | 130 | 56.0M | int tc = tc_orig; | 131 | 56.0M | int i_delta; | 132 | | | 133 | 56.0M | if( FFABS( p2 - p0 ) < beta ) { | 134 | 54.6M | if(tc_orig) | 135 | 52.4M | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); | 136 | 54.6M | tc++; | 137 | 54.6M | } | 138 | 56.0M | if( FFABS( q2 - q0 ) < beta ) { | 139 | 54.9M | if(tc_orig) | 140 | 52.7M | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); | 141 | 54.9M | tc++; | 142 | 54.9M | } | 143 | | | 144 | 56.0M | i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc ); | 145 | 56.0M | pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ | 146 | 56.0M | pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ | 147 | 56.0M | } | 148 | 64.4M | pix += ystride; | 149 | 64.4M | } | 150 | 16.9M | } | 151 | 4.74M | } |
h264dsp.c:h264_loop_filter_luma_8_c Line | Count | Source | 105 | 15.9M | { | 106 | 15.9M | pixel *pix = (pixel*)p_pix; | 107 | 15.9M | int i, d; | 108 | 15.9M | xstride >>= sizeof(pixel)-1; | 109 | 15.9M | ystride >>= sizeof(pixel)-1; | 110 | 15.9M | alpha <<= BIT_DEPTH - 8; | 111 | 15.9M | beta <<= BIT_DEPTH - 8; | 112 | 79.6M | for( i = 0; i < 4; i++ ) { | 113 | 63.7M | const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8)); | 114 | 63.7M | if( tc_orig < 0 ) { | 115 | 7.71M | pix += inner_iters*ystride; | 116 | 7.71M | continue; | 117 | 7.71M | } | 118 | 278M | for( d = 0; d < inner_iters; d++ ) { | 119 | 222M | const int p0 = pix[-1*xstride]; | 120 | 222M | const int p1 = pix[-2*xstride]; | 121 | 222M | const int p2 = pix[-3*xstride]; | 122 | 222M | const int q0 = pix[0]; | 123 | 222M | const int q1 = pix[1*xstride]; | 124 | 222M | const int q2 = pix[2*xstride]; | 125 | | | 126 | 222M | if( FFABS( p0 - q0 ) < alpha && | 127 | 213M | FFABS( p1 - p0 ) < beta && | 128 | 197M | FFABS( q1 - q0 ) < beta ) { | 129 | | | 130 | 187M | int tc = tc_orig; | 131 | 187M | int i_delta; | 132 | | | 133 | 187M | if( FFABS( p2 - p0 ) < beta ) { | 134 | 180M | if(tc_orig) | 135 | 174M | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); | 136 | 180M | tc++; | 137 | 180M | } | 138 | 187M | if( FFABS( q2 - q0 ) < beta ) { | 139 | 180M | if(tc_orig) | 140 | 174M | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); | 141 | 180M | tc++; | 142 | 180M | } | 143 | | | 144 | 187M | i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc ); | 145 | 187M | pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ | 146 | 187M | pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ | 147 | 187M | } | 148 | 222M | pix += ystride; | 149 | 222M | } | 150 | 56.0M | } | 151 | 15.9M | } |
|
152 | | static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
153 | 21.9M | { |
154 | 21.9M | FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); |
155 | 21.9M | } h264dsp.c:h264_v_loop_filter_luma_9_c Line | Count | Source | 153 | 3.54M | { | 154 | 3.54M | FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); | 155 | 3.54M | } |
h264dsp.c:h264_v_loop_filter_luma_10_c Line | Count | Source | 153 | 5.36M | { | 154 | 5.36M | FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); | 155 | 5.36M | } |
h264dsp.c:h264_v_loop_filter_luma_12_c Line | Count | Source | 153 | 2.65M | { | 154 | 2.65M | FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); | 155 | 2.65M | } |
h264dsp.c:h264_v_loop_filter_luma_14_c Line | Count | Source | 153 | 2.33M | { | 154 | 2.33M | FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); | 155 | 2.33M | } |
h264dsp.c:h264_v_loop_filter_luma_8_c Line | Count | Source | 153 | 7.99M | { | 154 | 7.99M | FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); | 155 | 7.99M | } |
|
156 | | static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
157 | 20.4M | { |
158 | 20.4M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); |
159 | 20.4M | } h264dsp.c:h264_h_loop_filter_luma_9_c Line | Count | Source | 157 | 3.42M | { | 158 | 3.42M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 159 | 3.42M | } |
h264dsp.c:h264_h_loop_filter_luma_10_c Line | Count | Source | 157 | 4.66M | { | 158 | 4.66M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 159 | 4.66M | } |
h264dsp.c:h264_h_loop_filter_luma_12_c Line | Count | Source | 157 | 2.63M | { | 158 | 2.63M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 159 | 2.63M | } |
h264dsp.c:h264_h_loop_filter_luma_14_c Line | Count | Source | 157 | 2.00M | { | 158 | 2.00M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 159 | 2.00M | } |
h264dsp.c:h264_h_loop_filter_luma_8_c Line | Count | Source | 157 | 7.70M | { | 158 | 7.70M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 159 | 7.70M | } |
|
160 | | static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
161 | 2.21M | { |
162 | 2.21M | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); |
163 | 2.21M | } h264dsp.c:h264_h_loop_filter_luma_mbaff_9_c Line | Count | Source | 161 | 323k | { | 162 | 323k | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 163 | 323k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_10_c Line | Count | Source | 161 | 758k | { | 162 | 758k | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 163 | 758k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_12_c Line | Count | Source | 161 | 508k | { | 162 | 508k | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 163 | 508k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_14_c Line | Count | Source | 161 | 398k | { | 162 | 398k | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 163 | 398k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_8_c Line | Count | Source | 161 | 228k | { | 162 | 228k | FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 163 | 228k | } |
|
164 | | |
165 | | static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta) |
166 | 4.47M | { |
167 | 4.47M | pixel *pix = (pixel*)p_pix; |
168 | 4.47M | int d; |
169 | 4.47M | xstride >>= sizeof(pixel)-1; |
170 | 4.47M | ystride >>= sizeof(pixel)-1; |
171 | 4.47M | alpha <<= BIT_DEPTH - 8; |
172 | 4.47M | beta <<= BIT_DEPTH - 8; |
173 | 73.8M | for( d = 0; d < 4 * inner_iters; d++ ) { |
174 | 69.3M | const int p2 = pix[-3*xstride]; |
175 | 69.3M | const int p1 = pix[-2*xstride]; |
176 | 69.3M | const int p0 = pix[-1*xstride]; |
177 | | |
178 | 69.3M | const int q0 = pix[ 0*xstride]; |
179 | 69.3M | const int q1 = pix[ 1*xstride]; |
180 | 69.3M | const int q2 = pix[ 2*xstride]; |
181 | | |
182 | 69.3M | if( FFABS( p0 - q0 ) < alpha && |
183 | 65.1M | FFABS( p1 - p0 ) < beta && |
184 | 61.0M | FFABS( q1 - q0 ) < beta ) { |
185 | | |
186 | 59.0M | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ |
187 | 55.1M | if( FFABS( p2 - p0 ) < beta) |
188 | 53.9M | { |
189 | 53.9M | const int p3 = pix[-4*xstride]; |
190 | | /* p0', p1', p2' */ |
191 | 53.9M | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; |
192 | 53.9M | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; |
193 | 53.9M | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; |
194 | 53.9M | } else { |
195 | | /* p0' */ |
196 | 1.20M | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
197 | 1.20M | } |
198 | 55.1M | if( FFABS( q2 - q0 ) < beta) |
199 | 53.9M | { |
200 | 53.9M | const int q3 = pix[3*xstride]; |
201 | | /* q0', q1', q2' */ |
202 | 53.9M | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; |
203 | 53.9M | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; |
204 | 53.9M | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; |
205 | 53.9M | } else { |
206 | | /* q0' */ |
207 | 1.14M | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
208 | 1.14M | } |
209 | 55.1M | }else{ |
210 | | /* p0', q0' */ |
211 | 3.96M | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
212 | 3.96M | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
213 | 3.96M | } |
214 | 59.0M | } |
215 | 69.3M | pix += ystride; |
216 | 69.3M | } |
217 | 4.47M | } h264dsp.c:h264_loop_filter_luma_intra_9_c Line | Count | Source | 166 | 700k | { | 167 | 700k | pixel *pix = (pixel*)p_pix; | 168 | 700k | int d; | 169 | 700k | xstride >>= sizeof(pixel)-1; | 170 | 700k | ystride >>= sizeof(pixel)-1; | 171 | 700k | alpha <<= BIT_DEPTH - 8; | 172 | 700k | beta <<= BIT_DEPTH - 8; | 173 | 11.3M | for( d = 0; d < 4 * inner_iters; d++ ) { | 174 | 10.6M | const int p2 = pix[-3*xstride]; | 175 | 10.6M | const int p1 = pix[-2*xstride]; | 176 | 10.6M | const int p0 = pix[-1*xstride]; | 177 | | | 178 | 10.6M | const int q0 = pix[ 0*xstride]; | 179 | 10.6M | const int q1 = pix[ 1*xstride]; | 180 | 10.6M | const int q2 = pix[ 2*xstride]; | 181 | | | 182 | 10.6M | if( FFABS( p0 - q0 ) < alpha && | 183 | 9.22M | FFABS( p1 - p0 ) < beta && | 184 | 8.01M | FFABS( q1 - q0 ) < beta ) { | 185 | | | 186 | 7.73M | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | 187 | 7.06M | if( FFABS( p2 - p0 ) < beta) | 188 | 6.94M | { | 189 | 6.94M | const int p3 = pix[-4*xstride]; | 190 | | /* p0', p1', p2' */ | 191 | 6.94M | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | 192 | 6.94M | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | 193 | 6.94M | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | 194 | 6.94M | } else { | 195 | | /* p0' */ | 196 | 120k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 197 | 120k | } | 198 | 7.06M | if( FFABS( q2 - q0 ) < beta) | 199 | 6.94M | { | 200 | 6.94M | const int q3 = pix[3*xstride]; | 201 | | /* q0', q1', q2' */ | 202 | 6.94M | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | 203 | 6.94M | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | 204 | 6.94M | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | 205 | 6.94M | } else { | 206 | | /* q0' */ | 207 | 120k | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 208 | 120k | } | 209 | 7.06M | }else{ | 210 | | /* p0', q0' */ | 211 | 667k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 212 | 667k | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 213 | 667k | } | 214 | 7.73M | } | 215 | 10.6M | pix += ystride; | 216 | 10.6M | } | 217 | 700k | } |
h264dsp.c:h264_loop_filter_luma_intra_10_c Line | Count | Source | 166 | 612k | { | 167 | 612k | pixel *pix = (pixel*)p_pix; | 168 | 612k | int d; | 169 | 612k | xstride >>= sizeof(pixel)-1; | 170 | 612k | ystride >>= sizeof(pixel)-1; | 171 | 612k | alpha <<= BIT_DEPTH - 8; | 172 | 612k | beta <<= BIT_DEPTH - 8; | 173 | 9.30M | for( d = 0; d < 4 * inner_iters; d++ ) { | 174 | 8.68M | const int p2 = pix[-3*xstride]; | 175 | 8.68M | const int p1 = pix[-2*xstride]; | 176 | 8.68M | const int p0 = pix[-1*xstride]; | 177 | | | 178 | 8.68M | const int q0 = pix[ 0*xstride]; | 179 | 8.68M | const int q1 = pix[ 1*xstride]; | 180 | 8.68M | const int q2 = pix[ 2*xstride]; | 181 | | | 182 | 8.68M | if( FFABS( p0 - q0 ) < alpha && | 183 | 8.37M | FFABS( p1 - p0 ) < beta && | 184 | 8.05M | FFABS( q1 - q0 ) < beta ) { | 185 | | | 186 | 7.85M | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | 187 | 7.40M | if( FFABS( p2 - p0 ) < beta) | 188 | 7.32M | { | 189 | 7.32M | const int p3 = pix[-4*xstride]; | 190 | | /* p0', p1', p2' */ | 191 | 7.32M | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | 192 | 7.32M | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | 193 | 7.32M | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | 194 | 7.32M | } else { | 195 | | /* p0' */ | 196 | 79.6k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 197 | 79.6k | } | 198 | 7.40M | if( FFABS( q2 - q0 ) < beta) | 199 | 7.30M | { | 200 | 7.30M | const int q3 = pix[3*xstride]; | 201 | | /* q0', q1', q2' */ | 202 | 7.30M | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | 203 | 7.30M | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | 204 | 7.30M | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | 205 | 7.30M | } else { | 206 | | /* q0' */ | 207 | 96.2k | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 208 | 96.2k | } | 209 | 7.40M | }else{ | 210 | | /* p0', q0' */ | 211 | 449k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 212 | 449k | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 213 | 449k | } | 214 | 7.85M | } | 215 | 8.68M | pix += ystride; | 216 | 8.68M | } | 217 | 612k | } |
h264dsp.c:h264_loop_filter_luma_intra_12_c Line | Count | Source | 166 | 220k | { | 167 | 220k | pixel *pix = (pixel*)p_pix; | 168 | 220k | int d; | 169 | 220k | xstride >>= sizeof(pixel)-1; | 170 | 220k | ystride >>= sizeof(pixel)-1; | 171 | 220k | alpha <<= BIT_DEPTH - 8; | 172 | 220k | beta <<= BIT_DEPTH - 8; | 173 | 3.48M | for( d = 0; d < 4 * inner_iters; d++ ) { | 174 | 3.26M | const int p2 = pix[-3*xstride]; | 175 | 3.26M | const int p1 = pix[-2*xstride]; | 176 | 3.26M | const int p0 = pix[-1*xstride]; | 177 | | | 178 | 3.26M | const int q0 = pix[ 0*xstride]; | 179 | 3.26M | const int q1 = pix[ 1*xstride]; | 180 | 3.26M | const int q2 = pix[ 2*xstride]; | 181 | | | 182 | 3.26M | if( FFABS( p0 - q0 ) < alpha && | 183 | 2.97M | FFABS( p1 - p0 ) < beta && | 184 | 2.76M | FFABS( q1 - q0 ) < beta ) { | 185 | | | 186 | 2.57M | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | 187 | 2.29M | if( FFABS( p2 - p0 ) < beta) | 188 | 2.17M | { | 189 | 2.17M | const int p3 = pix[-4*xstride]; | 190 | | /* p0', p1', p2' */ | 191 | 2.17M | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | 192 | 2.17M | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | 193 | 2.17M | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | 194 | 2.17M | } else { | 195 | | /* p0' */ | 196 | 118k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 197 | 118k | } | 198 | 2.29M | if( FFABS( q2 - q0 ) < beta) | 199 | 2.16M | { | 200 | 2.16M | const int q3 = pix[3*xstride]; | 201 | | /* q0', q1', q2' */ | 202 | 2.16M | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | 203 | 2.16M | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | 204 | 2.16M | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | 205 | 2.16M | } else { | 206 | | /* q0' */ | 207 | 124k | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 208 | 124k | } | 209 | 2.29M | }else{ | 210 | | /* p0', q0' */ | 211 | 282k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 212 | 282k | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 213 | 282k | } | 214 | 2.57M | } | 215 | 3.26M | pix += ystride; | 216 | 3.26M | } | 217 | 220k | } |
h264dsp.c:h264_loop_filter_luma_intra_14_c Line | Count | Source | 166 | 374k | { | 167 | 374k | pixel *pix = (pixel*)p_pix; | 168 | 374k | int d; | 169 | 374k | xstride >>= sizeof(pixel)-1; | 170 | 374k | ystride >>= sizeof(pixel)-1; | 171 | 374k | alpha <<= BIT_DEPTH - 8; | 172 | 374k | beta <<= BIT_DEPTH - 8; | 173 | 6.23M | for( d = 0; d < 4 * inner_iters; d++ ) { | 174 | 5.85M | const int p2 = pix[-3*xstride]; | 175 | 5.85M | const int p1 = pix[-2*xstride]; | 176 | 5.85M | const int p0 = pix[-1*xstride]; | 177 | | | 178 | 5.85M | const int q0 = pix[ 0*xstride]; | 179 | 5.85M | const int q1 = pix[ 1*xstride]; | 180 | 5.85M | const int q2 = pix[ 2*xstride]; | 181 | | | 182 | 5.85M | if( FFABS( p0 - q0 ) < alpha && | 183 | 5.61M | FFABS( p1 - p0 ) < beta && | 184 | 4.87M | FFABS( q1 - q0 ) < beta ) { | 185 | | | 186 | 4.45M | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | 187 | 4.19M | if( FFABS( p2 - p0 ) < beta) | 188 | 3.94M | { | 189 | 3.94M | const int p3 = pix[-4*xstride]; | 190 | | /* p0', p1', p2' */ | 191 | 3.94M | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | 192 | 3.94M | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | 193 | 3.94M | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | 194 | 3.94M | } else { | 195 | | /* p0' */ | 196 | 250k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 197 | 250k | } | 198 | 4.19M | if( FFABS( q2 - q0 ) < beta) | 199 | 4.08M | { | 200 | 4.08M | const int q3 = pix[3*xstride]; | 201 | | /* q0', q1', q2' */ | 202 | 4.08M | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | 203 | 4.08M | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | 204 | 4.08M | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | 205 | 4.08M | } else { | 206 | | /* q0' */ | 207 | 108k | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 208 | 108k | } | 209 | 4.19M | }else{ | 210 | | /* p0', q0' */ | 211 | 259k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 212 | 259k | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 213 | 259k | } | 214 | 4.45M | } | 215 | 5.85M | pix += ystride; | 216 | 5.85M | } | 217 | 374k | } |
h264dsp.c:h264_loop_filter_luma_intra_8_c Line | Count | Source | 166 | 2.57M | { | 167 | 2.57M | pixel *pix = (pixel*)p_pix; | 168 | 2.57M | int d; | 169 | 2.57M | xstride >>= sizeof(pixel)-1; | 170 | 2.57M | ystride >>= sizeof(pixel)-1; | 171 | 2.57M | alpha <<= BIT_DEPTH - 8; | 172 | 2.57M | beta <<= BIT_DEPTH - 8; | 173 | 43.4M | for( d = 0; d < 4 * inner_iters; d++ ) { | 174 | 40.9M | const int p2 = pix[-3*xstride]; | 175 | 40.9M | const int p1 = pix[-2*xstride]; | 176 | 40.9M | const int p0 = pix[-1*xstride]; | 177 | | | 178 | 40.9M | const int q0 = pix[ 0*xstride]; | 179 | 40.9M | const int q1 = pix[ 1*xstride]; | 180 | 40.9M | const int q2 = pix[ 2*xstride]; | 181 | | | 182 | 40.9M | if( FFABS( p0 - q0 ) < alpha && | 183 | 39.0M | FFABS( p1 - p0 ) < beta && | 184 | 37.3M | FFABS( q1 - q0 ) < beta ) { | 185 | | | 186 | 36.4M | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | 187 | 34.1M | if( FFABS( p2 - p0 ) < beta) | 188 | 33.5M | { | 189 | 33.5M | const int p3 = pix[-4*xstride]; | 190 | | /* p0', p1', p2' */ | 191 | 33.5M | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | 192 | 33.5M | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | 193 | 33.5M | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | 194 | 33.5M | } else { | 195 | | /* p0' */ | 196 | 636k | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 197 | 636k | } | 198 | 34.1M | if( FFABS( q2 - q0 ) < beta) | 199 | 33.4M | { | 200 | 33.4M | const int q3 = pix[3*xstride]; | 201 | | /* q0', q1', q2' */ | 202 | 33.4M | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | 203 | 33.4M | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | 204 | 33.4M | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | 205 | 33.4M | } else { | 206 | | /* q0' */ | 207 | 691k | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 208 | 691k | } | 209 | 34.1M | }else{ | 210 | | /* p0', q0' */ | 211 | 2.30M | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | 212 | 2.30M | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | 213 | 2.30M | } | 214 | 36.4M | } | 215 | 40.9M | pix += ystride; | 216 | 40.9M | } | 217 | 2.57M | } |
|
218 | | static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
219 | 1.85M | { |
220 | 1.85M | FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); |
221 | 1.85M | } h264dsp.c:h264_v_loop_filter_luma_intra_9_c Line | Count | Source | 219 | 277k | { | 220 | 277k | FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); | 221 | 277k | } |
h264dsp.c:h264_v_loop_filter_luma_intra_10_c Line | Count | Source | 219 | 173k | { | 220 | 173k | FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); | 221 | 173k | } |
h264dsp.c:h264_v_loop_filter_luma_intra_12_c Line | Count | Source | 219 | 79.6k | { | 220 | 79.6k | FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); | 221 | 79.6k | } |
h264dsp.c:h264_v_loop_filter_luma_intra_14_c Line | Count | Source | 219 | 149k | { | 220 | 149k | FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); | 221 | 149k | } |
h264dsp.c:h264_v_loop_filter_luma_intra_8_c Line | Count | Source | 219 | 1.17M | { | 220 | 1.17M | FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); | 221 | 1.17M | } |
|
222 | | static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
223 | 2.33M | { |
224 | 2.33M | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); |
225 | 2.33M | } h264dsp.c:h264_h_loop_filter_luma_intra_9_c Line | Count | Source | 223 | 352k | { | 224 | 352k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 225 | 352k | } |
h264dsp.c:h264_h_loop_filter_luma_intra_10_c Line | Count | Source | 223 | 300k | { | 224 | 300k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 225 | 300k | } |
h264dsp.c:h264_h_loop_filter_luma_intra_12_c Line | Count | Source | 223 | 108k | { | 224 | 108k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 225 | 108k | } |
h264dsp.c:h264_h_loop_filter_luma_intra_14_c Line | Count | Source | 223 | 208k | { | 224 | 208k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 225 | 208k | } |
h264dsp.c:h264_h_loop_filter_luma_intra_8_c Line | Count | Source | 223 | 1.36M | { | 224 | 1.36M | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 225 | 1.36M | } |
|
226 | | static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
227 | 284k | { |
228 | 284k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); |
229 | 284k | } h264dsp.c:h264_h_loop_filter_luma_mbaff_intra_9_c Line | Count | Source | 227 | 70.6k | { | 228 | 70.6k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 229 | 70.6k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_intra_10_c Line | Count | Source | 227 | 139k | { | 228 | 139k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 229 | 139k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_intra_12_c Line | Count | Source | 227 | 32.5k | { | 228 | 32.5k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 229 | 32.5k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_intra_14_c Line | Count | Source | 227 | 17.2k | { | 228 | 17.2k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 229 | 17.2k | } |
h264dsp.c:h264_h_loop_filter_luma_mbaff_intra_8_c Line | Count | Source | 227 | 25.1k | { | 228 | 25.1k | FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 229 | 25.1k | } |
|
230 | | |
231 | | static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta, int8_t *tc0) |
232 | 55.7M | { |
233 | 55.7M | pixel *pix = (pixel*)p_pix; |
234 | 55.7M | int i, d; |
235 | 55.7M | alpha <<= BIT_DEPTH - 8; |
236 | 55.7M | beta <<= BIT_DEPTH - 8; |
237 | 55.7M | xstride >>= sizeof(pixel)-1; |
238 | 55.7M | ystride >>= sizeof(pixel)-1; |
239 | 278M | for( i = 0; i < 4; i++ ) { |
240 | 223M | const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1; |
241 | 223M | if( tc <= 0 ) { |
242 | 23.2M | pix += inner_iters*ystride; |
243 | 23.2M | continue; |
244 | 23.2M | } |
245 | 651M | for( d = 0; d < inner_iters; d++ ) { |
246 | 451M | const int p0 = pix[-1*xstride]; |
247 | 451M | const int p1 = pix[-2*xstride]; |
248 | 451M | const int q0 = pix[0]; |
249 | 451M | const int q1 = pix[1*xstride]; |
250 | | |
251 | 451M | if( FFABS( p0 - q0 ) < alpha && |
252 | 434M | FFABS( p1 - p0 ) < beta && |
253 | 432M | FFABS( q1 - q0 ) < beta ) { |
254 | | |
255 | 431M | int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc ); |
256 | | |
257 | 431M | pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ |
258 | 431M | pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ |
259 | 431M | } |
260 | 451M | pix += ystride; |
261 | 451M | } |
262 | 199M | } |
263 | 55.7M | } h264dsp.c:h264_loop_filter_chroma_9_c Line | Count | Source | 232 | 10.5M | { | 233 | 10.5M | pixel *pix = (pixel*)p_pix; | 234 | 10.5M | int i, d; | 235 | 10.5M | alpha <<= BIT_DEPTH - 8; | 236 | 10.5M | beta <<= BIT_DEPTH - 8; | 237 | 10.5M | xstride >>= sizeof(pixel)-1; | 238 | 10.5M | ystride >>= sizeof(pixel)-1; | 239 | 52.8M | for( i = 0; i < 4; i++ ) { | 240 | 42.2M | const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1; | 241 | 42.2M | if( tc <= 0 ) { | 242 | 4.82M | pix += inner_iters*ystride; | 243 | 4.82M | continue; | 244 | 4.82M | } | 245 | 121M | for( d = 0; d < inner_iters; d++ ) { | 246 | 84.4M | const int p0 = pix[-1*xstride]; | 247 | 84.4M | const int p1 = pix[-2*xstride]; | 248 | 84.4M | const int q0 = pix[0]; | 249 | 84.4M | const int q1 = pix[1*xstride]; | 250 | | | 251 | 84.4M | if( FFABS( p0 - q0 ) < alpha && | 252 | 79.8M | FFABS( p1 - p0 ) < beta && | 253 | 79.3M | FFABS( q1 - q0 ) < beta ) { | 254 | | | 255 | 79.1M | int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc ); | 256 | | | 257 | 79.1M | pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ | 258 | 79.1M | pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ | 259 | 79.1M | } | 260 | 84.4M | pix += ystride; | 261 | 84.4M | } | 262 | 37.4M | } | 263 | 10.5M | } |
h264dsp.c:h264_loop_filter_chroma_10_c Line | Count | Source | 232 | 11.5M | { | 233 | 11.5M | pixel *pix = (pixel*)p_pix; | 234 | 11.5M | int i, d; | 235 | 11.5M | alpha <<= BIT_DEPTH - 8; | 236 | 11.5M | beta <<= BIT_DEPTH - 8; | 237 | 11.5M | xstride >>= sizeof(pixel)-1; | 238 | 11.5M | ystride >>= sizeof(pixel)-1; | 239 | 57.5M | for( i = 0; i < 4; i++ ) { | 240 | 46.0M | const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1; | 241 | 46.0M | if( tc <= 0 ) { | 242 | 4.16M | pix += inner_iters*ystride; | 243 | 4.16M | continue; | 244 | 4.16M | } | 245 | 131M | for( d = 0; d < inner_iters; d++ ) { | 246 | 89.4M | const int p0 = pix[-1*xstride]; | 247 | 89.4M | const int p1 = pix[-2*xstride]; | 248 | 89.4M | const int q0 = pix[0]; | 249 | 89.4M | const int q1 = pix[1*xstride]; | 250 | | | 251 | 89.4M | if( FFABS( p0 - q0 ) < alpha && | 252 | 85.2M | FFABS( p1 - p0 ) < beta && | 253 | 84.6M | FFABS( q1 - q0 ) < beta ) { | 254 | | | 255 | 84.2M | int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc ); | 256 | | | 257 | 84.2M | pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ | 258 | 84.2M | pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ | 259 | 84.2M | } | 260 | 89.4M | pix += ystride; | 261 | 89.4M | } | 262 | 41.8M | } | 263 | 11.5M | } |
h264dsp.c:h264_loop_filter_chroma_12_c Line | Count | Source | 232 | 9.65M | { | 233 | 9.65M | pixel *pix = (pixel*)p_pix; | 234 | 9.65M | int i, d; | 235 | 9.65M | alpha <<= BIT_DEPTH - 8; | 236 | 9.65M | beta <<= BIT_DEPTH - 8; | 237 | 9.65M | xstride >>= sizeof(pixel)-1; | 238 | 9.65M | ystride >>= sizeof(pixel)-1; | 239 | 48.2M | for( i = 0; i < 4; i++ ) { | 240 | 38.6M | const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1; | 241 | 38.6M | if( tc <= 0 ) { | 242 | 4.76M | pix += inner_iters*ystride; | 243 | 4.76M | continue; | 244 | 4.76M | } | 245 | 116M | for( d = 0; d < inner_iters; d++ ) { | 246 | 83.1M | const int p0 = pix[-1*xstride]; | 247 | 83.1M | const int p1 = pix[-2*xstride]; | 248 | 83.1M | const int q0 = pix[0]; | 249 | 83.1M | const int q1 = pix[1*xstride]; | 250 | | | 251 | 83.1M | if( FFABS( p0 - q0 ) < alpha && | 252 | 80.2M | FFABS( p1 - p0 ) < beta && | 253 | 79.9M | FFABS( q1 - q0 ) < beta ) { | 254 | | | 255 | 79.6M | int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc ); | 256 | | | 257 | 79.6M | pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ | 258 | 79.6M | pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ | 259 | 79.6M | } | 260 | 83.1M | pix += ystride; | 261 | 83.1M | } | 262 | 33.8M | } | 263 | 9.65M | } |
h264dsp.c:h264_loop_filter_chroma_14_c Line | Count | Source | 232 | 7.05M | { | 233 | 7.05M | pixel *pix = (pixel*)p_pix; | 234 | 7.05M | int i, d; | 235 | 7.05M | alpha <<= BIT_DEPTH - 8; | 236 | 7.05M | beta <<= BIT_DEPTH - 8; | 237 | 7.05M | xstride >>= sizeof(pixel)-1; | 238 | 7.05M | ystride >>= sizeof(pixel)-1; | 239 | 35.2M | for( i = 0; i < 4; i++ ) { | 240 | 28.2M | const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1; | 241 | 28.2M | if( tc <= 0 ) { | 242 | 2.36M | pix += inner_iters*ystride; | 243 | 2.36M | continue; | 244 | 2.36M | } | 245 | 86.8M | for( d = 0; d < inner_iters; d++ ) { | 246 | 60.9M | const int p0 = pix[-1*xstride]; | 247 | 60.9M | const int p1 = pix[-2*xstride]; | 248 | 60.9M | const int q0 = pix[0]; | 249 | 60.9M | const int q1 = pix[1*xstride]; | 250 | | | 251 | 60.9M | if( FFABS( p0 - q0 ) < alpha && | 252 | 60.0M | FFABS( p1 - p0 ) < beta && | 253 | 59.9M | FFABS( q1 - q0 ) < beta ) { | 254 | | | 255 | 59.8M | int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc ); | 256 | | | 257 | 59.8M | pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ | 258 | 59.8M | pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ | 259 | 59.8M | } | 260 | 60.9M | pix += ystride; | 261 | 60.9M | } | 262 | 25.8M | } | 263 | 7.05M | } |
h264dsp.c:h264_loop_filter_chroma_8_c Line | Count | Source | 232 | 16.9M | { | 233 | 16.9M | pixel *pix = (pixel*)p_pix; | 234 | 16.9M | int i, d; | 235 | 16.9M | alpha <<= BIT_DEPTH - 8; | 236 | 16.9M | beta <<= BIT_DEPTH - 8; | 237 | 16.9M | xstride >>= sizeof(pixel)-1; | 238 | 16.9M | ystride >>= sizeof(pixel)-1; | 239 | 84.9M | for( i = 0; i < 4; i++ ) { | 240 | 67.9M | const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1; | 241 | 67.9M | if( tc <= 0 ) { | 242 | 7.13M | pix += inner_iters*ystride; | 243 | 7.13M | continue; | 244 | 7.13M | } | 245 | 194M | for( d = 0; d < inner_iters; d++ ) { | 246 | 133M | const int p0 = pix[-1*xstride]; | 247 | 133M | const int p1 = pix[-2*xstride]; | 248 | 133M | const int q0 = pix[0]; | 249 | 133M | const int q1 = pix[1*xstride]; | 250 | | | 251 | 133M | if( FFABS( p0 - q0 ) < alpha && | 252 | 129M | FFABS( p1 - p0 ) < beta && | 253 | 128M | FFABS( q1 - q0 ) < beta ) { | 254 | | | 255 | 128M | int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc ); | 256 | | | 257 | 128M | pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ | 258 | 128M | pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ | 259 | 128M | } | 260 | 133M | pix += ystride; | 261 | 133M | } | 262 | 60.8M | } | 263 | 16.9M | } |
|
264 | | static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
265 | 29.7M | { |
266 | 29.7M | FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); |
267 | 29.7M | } h264dsp.c:h264_v_loop_filter_chroma_9_c Line | Count | Source | 265 | 5.59M | { | 266 | 5.59M | FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); | 267 | 5.59M | } |
h264dsp.c:h264_v_loop_filter_chroma_10_c Line | Count | Source | 265 | 6.12M | { | 266 | 6.12M | FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); | 267 | 6.12M | } |
h264dsp.c:h264_v_loop_filter_chroma_12_c Line | Count | Source | 265 | 5.03M | { | 266 | 5.03M | FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); | 267 | 5.03M | } |
h264dsp.c:h264_v_loop_filter_chroma_14_c Line | Count | Source | 265 | 3.88M | { | 266 | 3.88M | FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); | 267 | 3.88M | } |
h264dsp.c:h264_v_loop_filter_chroma_8_c Line | Count | Source | 265 | 9.11M | { | 266 | 9.11M | FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); | 267 | 9.11M | } |
|
268 | | static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
269 | 13.6M | { |
270 | 13.6M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); |
271 | 13.6M | } h264dsp.c:h264_h_loop_filter_chroma_9_c Line | Count | Source | 269 | 2.86M | { | 270 | 2.86M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 271 | 2.86M | } |
h264dsp.c:h264_h_loop_filter_chroma_10_c Line | Count | Source | 269 | 3.02M | { | 270 | 3.02M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 271 | 3.02M | } |
h264dsp.c:h264_h_loop_filter_chroma_12_c Line | Count | Source | 269 | 1.30M | { | 270 | 1.30M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 271 | 1.30M | } |
h264dsp.c:h264_h_loop_filter_chroma_14_c Line | Count | Source | 269 | 1.02M | { | 270 | 1.02M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 271 | 1.02M | } |
h264dsp.c:h264_h_loop_filter_chroma_8_c Line | Count | Source | 269 | 5.47M | { | 270 | 5.47M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 271 | 5.47M | } |
|
272 | | static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
273 | 1.38M | { |
274 | 1.38M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); |
275 | 1.38M | } h264dsp.c:h264_h_loop_filter_chroma_mbaff_9_c Line | Count | Source | 273 | 138k | { | 274 | 138k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | 275 | 138k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_10_c Line | Count | Source | 273 | 531k | { | 274 | 531k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | 275 | 531k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_12_c Line | Count | Source | 273 | 94.4k | { | 274 | 94.4k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | 275 | 94.4k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_14_c Line | Count | Source | 273 | 269k | { | 274 | 269k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | 275 | 269k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_8_c Line | Count | Source | 273 | 347k | { | 274 | 347k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | 275 | 347k | } |
|
276 | | static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
277 | 8.29M | { |
278 | 8.29M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); |
279 | 8.29M | } h264dsp.c:h264_h_loop_filter_chroma422_9_c Line | Count | Source | 277 | 1.47M | { | 278 | 1.47M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 279 | 1.47M | } |
h264dsp.c:h264_h_loop_filter_chroma422_10_c Line | Count | Source | 277 | 1.08M | { | 278 | 1.08M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 279 | 1.08M | } |
h264dsp.c:h264_h_loop_filter_chroma422_12_c Line | Count | Source | 277 | 2.30M | { | 278 | 2.30M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 279 | 2.30M | } |
h264dsp.c:h264_h_loop_filter_chroma422_14_c Line | Count | Source | 277 | 1.44M | { | 278 | 1.44M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 279 | 1.44M | } |
h264dsp.c:h264_h_loop_filter_chroma422_8_c Line | Count | Source | 277 | 1.99M | { | 278 | 1.99M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | 279 | 1.99M | } |
|
280 | | static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) |
281 | 2.64M | { |
282 | 2.64M | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); |
283 | 2.64M | } h264dsp.c:h264_h_loop_filter_chroma422_mbaff_9_c Line | Count | Source | 281 | 499k | { | 282 | 499k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 283 | 499k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_10_c Line | Count | Source | 281 | 751k | { | 282 | 751k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 283 | 751k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_12_c Line | Count | Source | 281 | 919k | { | 282 | 919k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 283 | 919k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_14_c Line | Count | Source | 281 | 424k | { | 282 | 424k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 283 | 424k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_8_c Line | Count | Source | 281 | 49.4k | { | 282 | 49.4k | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | 283 | 49.4k | } |
|
284 | | |
285 | | static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta) |
286 | 7.92M | { |
287 | 7.92M | pixel *pix = (pixel*)p_pix; |
288 | 7.92M | int d; |
289 | 7.92M | xstride >>= sizeof(pixel)-1; |
290 | 7.92M | ystride >>= sizeof(pixel)-1; |
291 | 7.92M | alpha <<= BIT_DEPTH - 8; |
292 | 7.92M | beta <<= BIT_DEPTH - 8; |
293 | 77.0M | for( d = 0; d < 4 * inner_iters; d++ ) { |
294 | 69.0M | const int p0 = pix[-1*xstride]; |
295 | 69.0M | const int p1 = pix[-2*xstride]; |
296 | 69.0M | const int q0 = pix[0]; |
297 | 69.0M | const int q1 = pix[1*xstride]; |
298 | | |
299 | 69.0M | if( FFABS( p0 - q0 ) < alpha && |
300 | 66.6M | FFABS( p1 - p0 ) < beta && |
301 | 66.3M | FFABS( q1 - q0 ) < beta ) { |
302 | | |
303 | 66.1M | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ |
304 | 66.1M | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ |
305 | 66.1M | } |
306 | 69.0M | pix += ystride; |
307 | 69.0M | } |
308 | 7.92M | } h264dsp.c:h264_loop_filter_chroma_intra_9_c Line | Count | Source | 286 | 1.22M | { | 287 | 1.22M | pixel *pix = (pixel*)p_pix; | 288 | 1.22M | int d; | 289 | 1.22M | xstride >>= sizeof(pixel)-1; | 290 | 1.22M | ystride >>= sizeof(pixel)-1; | 291 | 1.22M | alpha <<= BIT_DEPTH - 8; | 292 | 1.22M | beta <<= BIT_DEPTH - 8; | 293 | 12.6M | for( d = 0; d < 4 * inner_iters; d++ ) { | 294 | 11.4M | const int p0 = pix[-1*xstride]; | 295 | 11.4M | const int p1 = pix[-2*xstride]; | 296 | 11.4M | const int q0 = pix[0]; | 297 | 11.4M | const int q1 = pix[1*xstride]; | 298 | | | 299 | 11.4M | if( FFABS( p0 - q0 ) < alpha && | 300 | 10.5M | FFABS( p1 - p0 ) < beta && | 301 | 10.5M | FFABS( q1 - q0 ) < beta ) { | 302 | | | 303 | 10.5M | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | 304 | 10.5M | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | 305 | 10.5M | } | 306 | 11.4M | pix += ystride; | 307 | 11.4M | } | 308 | 1.22M | } |
h264dsp.c:h264_loop_filter_chroma_intra_10_c Line | Count | Source | 286 | 960k | { | 287 | 960k | pixel *pix = (pixel*)p_pix; | 288 | 960k | int d; | 289 | 960k | xstride >>= sizeof(pixel)-1; | 290 | 960k | ystride >>= sizeof(pixel)-1; | 291 | 960k | alpha <<= BIT_DEPTH - 8; | 292 | 960k | beta <<= BIT_DEPTH - 8; | 293 | 10.5M | for( d = 0; d < 4 * inner_iters; d++ ) { | 294 | 9.60M | const int p0 = pix[-1*xstride]; | 295 | 9.60M | const int p1 = pix[-2*xstride]; | 296 | 9.60M | const int q0 = pix[0]; | 297 | 9.60M | const int q1 = pix[1*xstride]; | 298 | | | 299 | 9.60M | if( FFABS( p0 - q0 ) < alpha && | 300 | 9.35M | FFABS( p1 - p0 ) < beta && | 301 | 9.31M | FFABS( q1 - q0 ) < beta ) { | 302 | | | 303 | 9.28M | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | 304 | 9.28M | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | 305 | 9.28M | } | 306 | 9.60M | pix += ystride; | 307 | 9.60M | } | 308 | 960k | } |
h264dsp.c:h264_loop_filter_chroma_intra_12_c Line | Count | Source | 286 | 407k | { | 287 | 407k | pixel *pix = (pixel*)p_pix; | 288 | 407k | int d; | 289 | 407k | xstride >>= sizeof(pixel)-1; | 290 | 407k | ystride >>= sizeof(pixel)-1; | 291 | 407k | alpha <<= BIT_DEPTH - 8; | 292 | 407k | beta <<= BIT_DEPTH - 8; | 293 | 4.48M | for( d = 0; d < 4 * inner_iters; d++ ) { | 294 | 4.07M | const int p0 = pix[-1*xstride]; | 295 | 4.07M | const int p1 = pix[-2*xstride]; | 296 | 4.07M | const int q0 = pix[0]; | 297 | 4.07M | const int q1 = pix[1*xstride]; | 298 | | | 299 | 4.07M | if( FFABS( p0 - q0 ) < alpha && | 300 | 3.69M | FFABS( p1 - p0 ) < beta && | 301 | 3.67M | FFABS( q1 - q0 ) < beta ) { | 302 | | | 303 | 3.61M | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | 304 | 3.61M | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | 305 | 3.61M | } | 306 | 4.07M | pix += ystride; | 307 | 4.07M | } | 308 | 407k | } |
h264dsp.c:h264_loop_filter_chroma_intra_14_c Line | Count | Source | 286 | 603k | { | 287 | 603k | pixel *pix = (pixel*)p_pix; | 288 | 603k | int d; | 289 | 603k | xstride >>= sizeof(pixel)-1; | 290 | 603k | ystride >>= sizeof(pixel)-1; | 291 | 603k | alpha <<= BIT_DEPTH - 8; | 292 | 603k | beta <<= BIT_DEPTH - 8; | 293 | 5.88M | for( d = 0; d < 4 * inner_iters; d++ ) { | 294 | 5.28M | const int p0 = pix[-1*xstride]; | 295 | 5.28M | const int p1 = pix[-2*xstride]; | 296 | 5.28M | const int q0 = pix[0]; | 297 | 5.28M | const int q1 = pix[1*xstride]; | 298 | | | 299 | 5.28M | if( FFABS( p0 - q0 ) < alpha && | 300 | 5.19M | FFABS( p1 - p0 ) < beta && | 301 | 5.19M | FFABS( q1 - q0 ) < beta ) { | 302 | | | 303 | 5.18M | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | 304 | 5.18M | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | 305 | 5.18M | } | 306 | 5.28M | pix += ystride; | 307 | 5.28M | } | 308 | 603k | } |
h264dsp.c:h264_loop_filter_chroma_intra_8_c Line | Count | Source | 286 | 4.73M | { | 287 | 4.73M | pixel *pix = (pixel*)p_pix; | 288 | 4.73M | int d; | 289 | 4.73M | xstride >>= sizeof(pixel)-1; | 290 | 4.73M | ystride >>= sizeof(pixel)-1; | 291 | 4.73M | alpha <<= BIT_DEPTH - 8; | 292 | 4.73M | beta <<= BIT_DEPTH - 8; | 293 | 43.3M | for( d = 0; d < 4 * inner_iters; d++ ) { | 294 | 38.6M | const int p0 = pix[-1*xstride]; | 295 | 38.6M | const int p1 = pix[-2*xstride]; | 296 | 38.6M | const int q0 = pix[0]; | 297 | 38.6M | const int q1 = pix[1*xstride]; | 298 | | | 299 | 38.6M | if( FFABS( p0 - q0 ) < alpha && | 300 | 37.7M | FFABS( p1 - p0 ) < beta && | 301 | 37.6M | FFABS( q1 - q0 ) < beta ) { | 302 | | | 303 | 37.5M | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | 304 | 37.5M | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | 305 | 37.5M | } | 306 | 38.6M | pix += ystride; | 307 | 38.6M | } | 308 | 4.73M | } |
|
309 | | static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
310 | 3.39M | { |
311 | 3.39M | FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); |
312 | 3.39M | } h264dsp.c:h264_v_loop_filter_chroma_intra_9_c Line | Count | Source | 310 | 490k | { | 311 | 490k | FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); | 312 | 490k | } |
h264dsp.c:h264_v_loop_filter_chroma_intra_10_c Line | Count | Source | 310 | 303k | { | 311 | 303k | FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); | 312 | 303k | } |
h264dsp.c:h264_v_loop_filter_chroma_intra_12_c Line | Count | Source | 310 | 150k | { | 311 | 150k | FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); | 312 | 150k | } |
h264dsp.c:h264_v_loop_filter_chroma_intra_14_c Line | Count | Source | 310 | 254k | { | 311 | 254k | FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); | 312 | 254k | } |
h264dsp.c:h264_v_loop_filter_chroma_intra_8_c Line | Count | Source | 310 | 2.19M | { | 311 | 2.19M | FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); | 312 | 2.19M | } |
|
313 | | static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
314 | 3.23M | { |
315 | 3.23M | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); |
316 | 3.23M | } h264dsp.c:h264_h_loop_filter_chroma_intra_9_c Line | Count | Source | 314 | 361k | { | 315 | 361k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 316 | 361k | } |
h264dsp.c:h264_h_loop_filter_chroma_intra_10_c Line | Count | Source | 314 | 155k | { | 315 | 155k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 316 | 155k | } |
h264dsp.c:h264_h_loop_filter_chroma_intra_12_c Line | Count | Source | 314 | 73.5k | { | 315 | 73.5k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 316 | 73.5k | } |
h264dsp.c:h264_h_loop_filter_chroma_intra_14_c Line | Count | Source | 314 | 269k | { | 315 | 269k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 316 | 269k | } |
h264dsp.c:h264_h_loop_filter_chroma_intra_8_c Line | Count | Source | 314 | 2.37M | { | 315 | 2.37M | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 316 | 2.37M | } |
|
317 | | static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
318 | 177k | { |
319 | 177k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); |
320 | 177k | } h264dsp.c:h264_h_loop_filter_chroma_mbaff_intra_9_c Line | Count | Source | 318 | 49.9k | { | 319 | 49.9k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | 320 | 49.9k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_intra_10_c Line | Count | Source | 318 | 52.3k | { | 319 | 52.3k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | 320 | 52.3k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_intra_12_c Line | Count | Source | 318 | 36.1k | { | 319 | 36.1k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | 320 | 36.1k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_intra_14_c Line | Count | Source | 318 | 2.63k | { | 319 | 2.63k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | 320 | 2.63k | } |
h264dsp.c:h264_h_loop_filter_chroma_mbaff_intra_8_c Line | Count | Source | 318 | 36.2k | { | 319 | 36.2k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | 320 | 36.2k | } |
|
321 | | static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
322 | 799k | { |
323 | 799k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); |
324 | 799k | } h264dsp.c:h264_h_loop_filter_chroma422_intra_9_c Line | Count | Source | 322 | 235k | { | 323 | 235k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 324 | 235k | } |
h264dsp.c:h264_h_loop_filter_chroma422_intra_10_c Line | Count | Source | 322 | 266k | { | 323 | 266k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 324 | 266k | } |
h264dsp.c:h264_h_loop_filter_chroma422_intra_12_c Line | Count | Source | 322 | 119k | { | 323 | 119k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 324 | 119k | } |
h264dsp.c:h264_h_loop_filter_chroma422_intra_14_c Line | Count | Source | 322 | 57.9k | { | 323 | 57.9k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 324 | 57.9k | } |
h264dsp.c:h264_h_loop_filter_chroma422_intra_8_c Line | Count | Source | 322 | 119k | { | 323 | 119k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | 324 | 119k | } |
|
325 | | static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) |
326 | 324k | { |
327 | 324k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); |
328 | 324k | } h264dsp.c:h264_h_loop_filter_chroma422_mbaff_intra_9_c Line | Count | Source | 326 | 83.1k | { | 327 | 83.1k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 328 | 83.1k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_intra_10_c Line | Count | Source | 326 | 183k | { | 327 | 183k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 328 | 183k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_intra_12_c Line | Count | Source | 326 | 28.6k | { | 327 | 28.6k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 328 | 28.6k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_intra_14_c Line | Count | Source | 326 | 18.9k | { | 327 | 18.9k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 328 | 18.9k | } |
h264dsp.c:h264_h_loop_filter_chroma422_mbaff_intra_8_c Line | Count | Source | 326 | 10.0k | { | 327 | 10.0k | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | 328 | 10.0k | } |
|