Coverage Report

Created: 2026-02-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libavcodec/h26x/h2656_inter_template.c
Line
Count
Source
1
/*
2
 * inter prediction template for HEVC/VVC
3
 *
4
 * Copyright (C) 2022 Nuo Mi
5
 * Copyright (C) 2024 Wu Jianhua
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23
24
11.4M
#define CHROMA_EXTRA_BEFORE     1
25
70.3M
#define CHROMA_EXTRA            3
26
5.07M
#define LUMA_EXTRA_BEFORE       3
27
51.6M
#define LUMA_EXTRA              7
28
29
static void FUNC(put_pixels)(int16_t *dst,
30
    const uint8_t *_src, const ptrdiff_t _src_stride,
31
    const int height, const int8_t *hf, const int8_t *vf, const int width)
32
9.41M
{
33
9.41M
    const pixel *src            = (const pixel *)_src;
34
9.41M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
112M
    for (int y = 0; y < height; y++) {
37
1.92G
        for (int x = 0; x < width; x++)
38
1.81G
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
102M
        src += src_stride;
40
102M
        dst += MAX_PB_SIZE;
41
102M
    }
42
9.41M
}
dsp.c:put_pixels_9
Line
Count
Source
32
496k
{
33
496k
    const pixel *src            = (const pixel *)_src;
34
496k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
11.2M
    for (int y = 0; y < height; y++) {
37
233M
        for (int x = 0; x < width; x++)
38
222M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
10.7M
        src += src_stride;
40
10.7M
        dst += MAX_PB_SIZE;
41
10.7M
    }
42
496k
}
dsp.c:put_pixels_10
Line
Count
Source
32
351k
{
33
351k
    const pixel *src            = (const pixel *)_src;
34
351k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
4.57M
    for (int y = 0; y < height; y++) {
37
84.1M
        for (int x = 0; x < width; x++)
38
79.9M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
4.22M
        src += src_stride;
40
4.22M
        dst += MAX_PB_SIZE;
41
4.22M
    }
42
351k
}
dsp.c:put_pixels_12
Line
Count
Source
32
2.31M
{
33
2.31M
    const pixel *src            = (const pixel *)_src;
34
2.31M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
25.4M
    for (int y = 0; y < height; y++) {
37
424M
        for (int x = 0; x < width; x++)
38
400M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
23.1M
        src += src_stride;
40
23.1M
        dst += MAX_PB_SIZE;
41
23.1M
    }
42
2.31M
}
dsp.c:put_pixels_8
Line
Count
Source
32
1.79M
{
33
1.79M
    const pixel *src            = (const pixel *)_src;
34
1.79M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
20.4M
    for (int y = 0; y < height; y++) {
37
336M
        for (int x = 0; x < width; x++)
38
317M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
18.6M
        src += src_stride;
40
18.6M
        dst += MAX_PB_SIZE;
41
18.6M
    }
42
1.79M
}
dsp.c:put_pixels_12
Line
Count
Source
32
2.31M
{
33
2.31M
    const pixel *src            = (const pixel *)_src;
34
2.31M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
25.4M
    for (int y = 0; y < height; y++) {
37
424M
        for (int x = 0; x < width; x++)
38
400M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
23.1M
        src += src_stride;
40
23.1M
        dst += MAX_PB_SIZE;
41
23.1M
    }
42
2.31M
}
dsp.c:put_pixels_10
Line
Count
Source
32
351k
{
33
351k
    const pixel *src            = (const pixel *)_src;
34
351k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
4.57M
    for (int y = 0; y < height; y++) {
37
84.1M
        for (int x = 0; x < width; x++)
38
79.9M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
4.22M
        src += src_stride;
40
4.22M
        dst += MAX_PB_SIZE;
41
4.22M
    }
42
351k
}
dsp.c:put_pixels_8
Line
Count
Source
32
1.79M
{
33
1.79M
    const pixel *src            = (const pixel *)_src;
34
1.79M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
35
36
20.4M
    for (int y = 0; y < height; y++) {
37
336M
        for (int x = 0; x < width; x++)
38
317M
            dst[x] = src[x] << (14 - BIT_DEPTH);
39
18.6M
        src += src_stride;
40
18.6M
        dst += MAX_PB_SIZE;
41
18.6M
    }
42
1.79M
}
43
44
static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride,
45
    const uint8_t *_src, const ptrdiff_t _src_stride, const int height,
46
     const int8_t *hf, const int8_t *vf, const int width)
47
2.13M
{
48
2.13M
    const pixel *src            = (const pixel *)_src;
49
2.13M
    pixel *dst                  = (pixel *)_dst;
50
2.13M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
51
2.13M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
52
53
18.6M
    for (int y = 0; y < height; y++) {
54
16.4M
        memcpy(dst, src, width * sizeof(pixel));
55
16.4M
        src += src_stride;
56
16.4M
        dst += dst_stride;
57
16.4M
    }
58
2.13M
}
dsp.c:put_uni_pixels_9
Line
Count
Source
47
161k
{
48
161k
    const pixel *src            = (const pixel *)_src;
49
161k
    pixel *dst                  = (pixel *)_dst;
50
161k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
51
161k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
52
53
1.35M
    for (int y = 0; y < height; y++) {
54
1.18M
        memcpy(dst, src, width * sizeof(pixel));
55
1.18M
        src += src_stride;
56
1.18M
        dst += dst_stride;
57
1.18M
    }
58
161k
}
dsp.c:put_uni_pixels_10
Line
Count
Source
47
95.6k
{
48
95.6k
    const pixel *src            = (const pixel *)_src;
49
95.6k
    pixel *dst                  = (pixel *)_dst;
50
95.6k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
51
95.6k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
52
53
792k
    for (int y = 0; y < height; y++) {
54
696k
        memcpy(dst, src, width * sizeof(pixel));
55
696k
        src += src_stride;
56
696k
        dst += dst_stride;
57
696k
    }
58
95.6k
}
dsp.c:put_uni_pixels_12
Line
Count
Source
47
1.14M
{
48
1.14M
    const pixel *src            = (const pixel *)_src;
49
1.14M
    pixel *dst                  = (pixel *)_dst;
50
1.14M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
51
1.14M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
52
53
10.1M
    for (int y = 0; y < height; y++) {
54
8.98M
        memcpy(dst, src, width * sizeof(pixel));
55
8.98M
        src += src_stride;
56
8.98M
        dst += dst_stride;
57
8.98M
    }
58
1.14M
}
dsp.c:put_uni_pixels_8
Line
Count
Source
47
733k
{
48
733k
    const pixel *src            = (const pixel *)_src;
49
733k
    pixel *dst                  = (pixel *)_dst;
50
733k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
51
733k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
52
53
6.32M
    for (int y = 0; y < height; y++) {
54
5.59M
        memcpy(dst, src, width * sizeof(pixel));
55
5.59M
        src += src_stride;
56
5.59M
        dst += dst_stride;
57
5.59M
    }
58
733k
}
59
60
static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride,
61
    const uint8_t *_src, const ptrdiff_t _src_stride, const int height,
62
    const int denom, const int wx, const int _ox,  const int8_t *hf, const int8_t *vf,
63
    const int width)
64
1.39M
{
65
1.39M
    const pixel *src            = (const pixel *)_src;
66
1.39M
    pixel *dst                  = (pixel *)_dst;
67
1.39M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
1.39M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
1.39M
    const int shift             = denom + 14 - BIT_DEPTH;
70
1.39M
#if BIT_DEPTH < 14
71
1.39M
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
1.39M
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
12.4M
    for (int y = 0; y < height; y++) {
78
164M
        for (int x = 0; x < width; x++) {
79
153M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
153M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
153M
        }
82
11.0M
        src += src_stride;
83
11.0M
        dst += dst_stride;
84
11.0M
    }
85
1.39M
}
dsp.c:put_uni_w_pixels_9
Line
Count
Source
64
54.0k
{
65
54.0k
    const pixel *src            = (const pixel *)_src;
66
54.0k
    pixel *dst                  = (pixel *)_dst;
67
54.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
54.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
54.0k
    const int shift             = denom + 14 - BIT_DEPTH;
70
54.0k
#if BIT_DEPTH < 14
71
54.0k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
54.0k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
927k
    for (int y = 0; y < height; y++) {
78
18.0M
        for (int x = 0; x < width; x++) {
79
17.1M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
17.1M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
17.1M
        }
82
873k
        src += src_stride;
83
873k
        dst += dst_stride;
84
873k
    }
85
54.0k
}
dsp.c:put_uni_w_pixels_10
Line
Count
Source
64
48.2k
{
65
48.2k
    const pixel *src            = (const pixel *)_src;
66
48.2k
    pixel *dst                  = (pixel *)_dst;
67
48.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
48.2k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
48.2k
    const int shift             = denom + 14 - BIT_DEPTH;
70
48.2k
#if BIT_DEPTH < 14
71
48.2k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
48.2k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
683k
    for (int y = 0; y < height; y++) {
78
13.4M
        for (int x = 0; x < width; x++) {
79
12.8M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
12.8M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
12.8M
        }
82
634k
        src += src_stride;
83
634k
        dst += dst_stride;
84
634k
    }
85
48.2k
}
dsp.c:put_uni_w_pixels_12
Line
Count
Source
64
385k
{
65
385k
    const pixel *src            = (const pixel *)_src;
66
385k
    pixel *dst                  = (pixel *)_dst;
67
385k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
385k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
385k
    const int shift             = denom + 14 - BIT_DEPTH;
70
385k
#if BIT_DEPTH < 14
71
385k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
385k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
3.32M
    for (int y = 0; y < height; y++) {
78
42.2M
        for (int x = 0; x < width; x++) {
79
39.2M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
39.2M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
39.2M
        }
82
2.94M
        src += src_stride;
83
2.94M
        dst += dst_stride;
84
2.94M
    }
85
385k
}
dsp.c:put_uni_w_pixels_8
Line
Count
Source
64
236k
{
65
236k
    const pixel *src            = (const pixel *)_src;
66
236k
    pixel *dst                  = (pixel *)_dst;
67
236k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
236k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
236k
    const int shift             = denom + 14 - BIT_DEPTH;
70
236k
#if BIT_DEPTH < 14
71
236k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
236k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
1.74M
    for (int y = 0; y < height; y++) {
78
17.6M
        for (int x = 0; x < width; x++) {
79
16.1M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
16.1M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
16.1M
        }
82
1.51M
        src += src_stride;
83
1.51M
        dst += dst_stride;
84
1.51M
    }
85
236k
}
dsp.c:put_uni_w_pixels_12
Line
Count
Source
64
385k
{
65
385k
    const pixel *src            = (const pixel *)_src;
66
385k
    pixel *dst                  = (pixel *)_dst;
67
385k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
385k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
385k
    const int shift             = denom + 14 - BIT_DEPTH;
70
385k
#if BIT_DEPTH < 14
71
385k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
385k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
3.32M
    for (int y = 0; y < height; y++) {
78
42.2M
        for (int x = 0; x < width; x++) {
79
39.2M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
39.2M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
39.2M
        }
82
2.94M
        src += src_stride;
83
2.94M
        dst += dst_stride;
84
2.94M
    }
85
385k
}
dsp.c:put_uni_w_pixels_10
Line
Count
Source
64
48.2k
{
65
48.2k
    const pixel *src            = (const pixel *)_src;
66
48.2k
    pixel *dst                  = (pixel *)_dst;
67
48.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
48.2k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
48.2k
    const int shift             = denom + 14 - BIT_DEPTH;
70
48.2k
#if BIT_DEPTH < 14
71
48.2k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
48.2k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
683k
    for (int y = 0; y < height; y++) {
78
13.4M
        for (int x = 0; x < width; x++) {
79
12.8M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
12.8M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
12.8M
        }
82
634k
        src += src_stride;
83
634k
        dst += dst_stride;
84
634k
    }
85
48.2k
}
dsp.c:put_uni_w_pixels_8
Line
Count
Source
64
236k
{
65
236k
    const pixel *src            = (const pixel *)_src;
66
236k
    pixel *dst                  = (pixel *)_dst;
67
236k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
68
236k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
69
236k
    const int shift             = denom + 14 - BIT_DEPTH;
70
236k
#if BIT_DEPTH < 14
71
236k
    const int offset            = 1 << (shift - 1);
72
#else
73
    const int offset            = 0;
74
#endif
75
236k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
76
77
1.74M
    for (int y = 0; y < height; y++) {
78
17.6M
        for (int x = 0; x < width; x++) {
79
16.1M
            const int v = (src[x] << (14 - BIT_DEPTH));
80
16.1M
            dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox);
81
16.1M
        }
82
1.51M
        src += src_stride;
83
1.51M
        dst += dst_stride;
84
1.51M
    }
85
236k
}
86
87
#define LUMA_FILTER(src, stride)                                               \
88
1.84G
    (filter[0] * src[x - 3 * stride] +                                         \
89
1.84G
     filter[1] * src[x - 2 * stride] +                                         \
90
1.84G
     filter[2] * src[x -     stride] +                                         \
91
1.84G
     filter[3] * src[x             ] +                                         \
92
1.84G
     filter[4] * src[x +     stride] +                                         \
93
1.84G
     filter[5] * src[x + 2 * stride] +                                         \
94
1.84G
     filter[6] * src[x + 3 * stride] +                                         \
95
1.84G
     filter[7] * src[x + 4 * stride])
96
97
static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
98
    const int height, const int8_t *hf, const int8_t *vf, const int width)
99
350k
{
100
350k
    const pixel *src           = (const pixel*)_src;
101
350k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
350k
    const int8_t *filter       = hf;
103
104
5.98M
    for (int y = 0; y < height; y++) {
105
149M
        for (int x = 0; x < width; x++)
106
144M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
5.63M
        src += src_stride;
108
5.63M
        dst += MAX_PB_SIZE;
109
5.63M
    }
110
350k
}
dsp.c:put_luma_h_9
Line
Count
Source
99
25.8k
{
100
25.8k
    const pixel *src           = (const pixel*)_src;
101
25.8k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
25.8k
    const int8_t *filter       = hf;
103
104
528k
    for (int y = 0; y < height; y++) {
105
13.8M
        for (int x = 0; x < width; x++)
106
13.3M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
502k
        src += src_stride;
108
502k
        dst += MAX_PB_SIZE;
109
502k
    }
110
25.8k
}
dsp.c:put_luma_h_10
Line
Count
Source
99
22.0k
{
100
22.0k
    const pixel *src           = (const pixel*)_src;
101
22.0k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
22.0k
    const int8_t *filter       = hf;
103
104
562k
    for (int y = 0; y < height; y++) {
105
19.5M
        for (int x = 0; x < width; x++)
106
18.9M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
540k
        src += src_stride;
108
540k
        dst += MAX_PB_SIZE;
109
540k
    }
110
22.0k
}
dsp.c:put_luma_h_12
Line
Count
Source
99
74.9k
{
100
74.9k
    const pixel *src           = (const pixel*)_src;
101
74.9k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
74.9k
    const int8_t *filter       = hf;
103
104
1.01M
    for (int y = 0; y < height; y++) {
105
21.4M
        for (int x = 0; x < width; x++)
106
20.4M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
939k
        src += src_stride;
108
939k
        dst += MAX_PB_SIZE;
109
939k
    }
110
74.9k
}
dsp.c:put_luma_h_8
Line
Count
Source
99
65.3k
{
100
65.3k
    const pixel *src           = (const pixel*)_src;
101
65.3k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
65.3k
    const int8_t *filter       = hf;
103
104
1.15M
    for (int y = 0; y < height; y++) {
105
27.1M
        for (int x = 0; x < width; x++)
106
26.0M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
1.08M
        src += src_stride;
108
1.08M
        dst += MAX_PB_SIZE;
109
1.08M
    }
110
65.3k
}
dsp.c:put_luma_h_12
Line
Count
Source
99
74.9k
{
100
74.9k
    const pixel *src           = (const pixel*)_src;
101
74.9k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
74.9k
    const int8_t *filter       = hf;
103
104
1.01M
    for (int y = 0; y < height; y++) {
105
21.4M
        for (int x = 0; x < width; x++)
106
20.4M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
939k
        src += src_stride;
108
939k
        dst += MAX_PB_SIZE;
109
939k
    }
110
74.9k
}
dsp.c:put_luma_h_10
Line
Count
Source
99
22.0k
{
100
22.0k
    const pixel *src           = (const pixel*)_src;
101
22.0k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
22.0k
    const int8_t *filter       = hf;
103
104
562k
    for (int y = 0; y < height; y++) {
105
19.5M
        for (int x = 0; x < width; x++)
106
18.9M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
540k
        src += src_stride;
108
540k
        dst += MAX_PB_SIZE;
109
540k
    }
110
22.0k
}
dsp.c:put_luma_h_8
Line
Count
Source
99
65.3k
{
100
65.3k
    const pixel *src           = (const pixel*)_src;
101
65.3k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
102
65.3k
    const int8_t *filter       = hf;
103
104
1.15M
    for (int y = 0; y < height; y++) {
105
27.1M
        for (int x = 0; x < width; x++)
106
26.0M
            dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
107
1.08M
        src += src_stride;
108
1.08M
        dst += MAX_PB_SIZE;
109
1.08M
    }
110
65.3k
}
111
112
static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
113
    const int height, const int8_t *hf, const int8_t *vf, const int width)
114
267k
{
115
267k
    const pixel *src           = (pixel*)_src;
116
267k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
267k
    const int8_t *filter       = vf;
118
119
4.76M
    for (int y = 0; y < height; y++)  {
120
126M
        for (int x = 0; x < width; x++)
121
122M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
4.50M
        src += src_stride;
123
4.50M
        dst += MAX_PB_SIZE;
124
4.50M
    }
125
267k
}
dsp.c:put_luma_v_9
Line
Count
Source
114
29.6k
{
115
29.6k
    const pixel *src           = (pixel*)_src;
116
29.6k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
29.6k
    const int8_t *filter       = vf;
118
119
738k
    for (int y = 0; y < height; y++)  {
120
22.4M
        for (int x = 0; x < width; x++)
121
21.7M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
708k
        src += src_stride;
123
708k
        dst += MAX_PB_SIZE;
124
708k
    }
125
29.6k
}
dsp.c:put_luma_v_10
Line
Count
Source
114
12.0k
{
115
12.0k
    const pixel *src           = (pixel*)_src;
116
12.0k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
12.0k
    const int8_t *filter       = vf;
118
119
258k
    for (int y = 0; y < height; y++)  {
120
8.90M
        for (int x = 0; x < width; x++)
121
8.65M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
246k
        src += src_stride;
123
246k
        dst += MAX_PB_SIZE;
124
246k
    }
125
12.0k
}
dsp.c:put_luma_v_12
Line
Count
Source
114
55.0k
{
115
55.0k
    const pixel *src           = (pixel*)_src;
116
55.0k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
55.0k
    const int8_t *filter       = vf;
118
119
810k
    for (int y = 0; y < height; y++)  {
120
18.7M
        for (int x = 0; x < width; x++)
121
18.0M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
755k
        src += src_stride;
123
755k
        dst += MAX_PB_SIZE;
124
755k
    }
125
55.0k
}
dsp.c:put_luma_v_8
Line
Count
Source
114
51.7k
{
115
51.7k
    const pixel *src           = (pixel*)_src;
116
51.7k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
51.7k
    const int8_t *filter       = vf;
118
119
945k
    for (int y = 0; y < height; y++)  {
120
24.4M
        for (int x = 0; x < width; x++)
121
23.5M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
893k
        src += src_stride;
123
893k
        dst += MAX_PB_SIZE;
124
893k
    }
125
51.7k
}
dsp.c:put_luma_v_12
Line
Count
Source
114
55.0k
{
115
55.0k
    const pixel *src           = (pixel*)_src;
116
55.0k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
55.0k
    const int8_t *filter       = vf;
118
119
810k
    for (int y = 0; y < height; y++)  {
120
18.7M
        for (int x = 0; x < width; x++)
121
18.0M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
755k
        src += src_stride;
123
755k
        dst += MAX_PB_SIZE;
124
755k
    }
125
55.0k
}
dsp.c:put_luma_v_10
Line
Count
Source
114
12.0k
{
115
12.0k
    const pixel *src           = (pixel*)_src;
116
12.0k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
12.0k
    const int8_t *filter       = vf;
118
119
258k
    for (int y = 0; y < height; y++)  {
120
8.90M
        for (int x = 0; x < width; x++)
121
8.65M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
246k
        src += src_stride;
123
246k
        dst += MAX_PB_SIZE;
124
246k
    }
125
12.0k
}
dsp.c:put_luma_v_8
Line
Count
Source
114
51.7k
{
115
51.7k
    const pixel *src           = (pixel*)_src;
116
51.7k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
117
51.7k
    const int8_t *filter       = vf;
118
119
945k
    for (int y = 0; y < height; y++)  {
120
24.4M
        for (int x = 0; x < width; x++)
121
23.5M
            dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
122
893k
        src += src_stride;
123
893k
        dst += MAX_PB_SIZE;
124
893k
    }
125
51.7k
}
126
127
static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
128
    const int height, const int8_t *hf, const int8_t *vf, const int width)
129
735k
{
130
735k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
735k
    int16_t *tmp                = tmp_array;
132
735k
    const pixel *src            = (const pixel*)_src;
133
735k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
735k
    const int8_t *filter        = hf;
135
136
735k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
17.5M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
386M
        for (int x = 0; x < width; x++)
139
369M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
16.7M
        src += src_stride;
141
16.7M
        tmp += MAX_PB_SIZE;
142
16.7M
    }
143
144
735k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
735k
    filter = vf;
146
12.3M
    for (int y = 0; y < height; y++) {
147
300M
        for (int x = 0; x < width; x++)
148
289M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
11.6M
        tmp += MAX_PB_SIZE;
150
11.6M
        dst += MAX_PB_SIZE;
151
11.6M
    }
152
735k
}
dsp.c:put_luma_hv_9
Line
Count
Source
129
67.6k
{
130
67.6k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
67.6k
    int16_t *tmp                = tmp_array;
132
67.6k
    const pixel *src            = (const pixel*)_src;
133
67.6k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
67.6k
    const int8_t *filter        = hf;
135
136
67.6k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
2.08M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
57.7M
        for (int x = 0; x < width; x++)
139
55.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
2.01M
        src += src_stride;
141
2.01M
        tmp += MAX_PB_SIZE;
142
2.01M
    }
143
144
67.6k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
67.6k
    filter = vf;
146
1.60M
    for (int y = 0; y < height; y++) {
147
46.0M
        for (int x = 0; x < width; x++)
148
44.4M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
1.53M
        tmp += MAX_PB_SIZE;
150
1.53M
        dst += MAX_PB_SIZE;
151
1.53M
    }
152
67.6k
}
dsp.c:put_luma_hv_10
Line
Count
Source
129
26.3k
{
130
26.3k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
26.3k
    int16_t *tmp                = tmp_array;
132
26.3k
    const pixel *src            = (const pixel*)_src;
133
26.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
26.3k
    const int8_t *filter        = hf;
135
136
26.3k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
775k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
23.4M
        for (int x = 0; x < width; x++)
139
22.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
748k
        src += src_stride;
141
748k
        tmp += MAX_PB_SIZE;
142
748k
    }
143
144
26.3k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
26.3k
    filter = vf;
146
590k
    for (int y = 0; y < height; y++) {
147
19.4M
        for (int x = 0; x < width; x++)
148
18.8M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
564k
        tmp += MAX_PB_SIZE;
150
564k
        dst += MAX_PB_SIZE;
151
564k
    }
152
26.3k
}
dsp.c:put_luma_hv_12
Line
Count
Source
129
152k
{
130
152k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
152k
    int16_t *tmp                = tmp_array;
132
152k
    const pixel *src            = (const pixel*)_src;
133
152k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
152k
    const int8_t *filter        = hf;
135
136
152k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
3.25M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
66.4M
        for (int x = 0; x < width; x++)
139
63.3M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
3.10M
        src += src_stride;
141
3.10M
        tmp += MAX_PB_SIZE;
142
3.10M
    }
143
144
152k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
152k
    filter = vf;
146
2.18M
    for (int y = 0; y < height; y++) {
147
51.2M
        for (int x = 0; x < width; x++)
148
49.2M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
2.03M
        tmp += MAX_PB_SIZE;
150
2.03M
        dst += MAX_PB_SIZE;
151
2.03M
    }
152
152k
}
dsp.c:put_luma_hv_8
Line
Count
Source
129
154k
{
130
154k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
154k
    int16_t *tmp                = tmp_array;
132
154k
    const pixel *src            = (const pixel*)_src;
133
154k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
154k
    const int8_t *filter        = hf;
135
136
154k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
3.68M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
74.2M
        for (int x = 0; x < width; x++)
139
70.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
3.53M
        src += src_stride;
141
3.53M
        tmp += MAX_PB_SIZE;
142
3.53M
    }
143
144
154k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
154k
    filter = vf;
146
2.60M
    for (int y = 0; y < height; y++) {
147
56.6M
        for (int x = 0; x < width; x++)
148
54.2M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
2.44M
        tmp += MAX_PB_SIZE;
150
2.44M
        dst += MAX_PB_SIZE;
151
2.44M
    }
152
154k
}
dsp.c:put_luma_hv_12
Line
Count
Source
129
152k
{
130
152k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
152k
    int16_t *tmp                = tmp_array;
132
152k
    const pixel *src            = (const pixel*)_src;
133
152k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
152k
    const int8_t *filter        = hf;
135
136
152k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
3.25M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
66.4M
        for (int x = 0; x < width; x++)
139
63.3M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
3.10M
        src += src_stride;
141
3.10M
        tmp += MAX_PB_SIZE;
142
3.10M
    }
143
144
152k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
152k
    filter = vf;
146
2.18M
    for (int y = 0; y < height; y++) {
147
51.2M
        for (int x = 0; x < width; x++)
148
49.2M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
2.03M
        tmp += MAX_PB_SIZE;
150
2.03M
        dst += MAX_PB_SIZE;
151
2.03M
    }
152
152k
}
dsp.c:put_luma_hv_10
Line
Count
Source
129
26.3k
{
130
26.3k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
26.3k
    int16_t *tmp                = tmp_array;
132
26.3k
    const pixel *src            = (const pixel*)_src;
133
26.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
26.3k
    const int8_t *filter        = hf;
135
136
26.3k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
775k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
23.4M
        for (int x = 0; x < width; x++)
139
22.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
748k
        src += src_stride;
141
748k
        tmp += MAX_PB_SIZE;
142
748k
    }
143
144
26.3k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
26.3k
    filter = vf;
146
590k
    for (int y = 0; y < height; y++) {
147
19.4M
        for (int x = 0; x < width; x++)
148
18.8M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
564k
        tmp += MAX_PB_SIZE;
150
564k
        dst += MAX_PB_SIZE;
151
564k
    }
152
26.3k
}
dsp.c:put_luma_hv_8
Line
Count
Source
129
154k
{
130
154k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
131
154k
    int16_t *tmp                = tmp_array;
132
154k
    const pixel *src            = (const pixel*)_src;
133
154k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
134
154k
    const int8_t *filter        = hf;
135
136
154k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
137
3.68M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
138
74.2M
        for (int x = 0; x < width; x++)
139
70.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
140
3.53M
        src += src_stride;
141
3.53M
        tmp += MAX_PB_SIZE;
142
3.53M
    }
143
144
154k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
145
154k
    filter = vf;
146
2.60M
    for (int y = 0; y < height; y++) {
147
56.6M
        for (int x = 0; x < width; x++)
148
54.2M
            dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
149
2.44M
        tmp += MAX_PB_SIZE;
150
2.44M
        dst += MAX_PB_SIZE;
151
2.44M
    }
152
154k
}
153
154
static void FUNC(put_uni_luma_h)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
155
    const uint8_t *_src, const ptrdiff_t _src_stride,
156
    const int height, const int8_t *hf, const int8_t *vf, const int width)
157
559k
{
158
559k
    const pixel *src           = (const pixel*)_src;
159
559k
    pixel *dst                 = (pixel *)_dst;
160
559k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
559k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
559k
    const int8_t *filter       = hf;
163
559k
    const int shift            = 14 - BIT_DEPTH;
164
559k
#if BIT_DEPTH < 14
165
559k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
7.15M
    for (int y = 0; y < height; y++) {
171
137M
        for (int x = 0; x < width; x++) {
172
130M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
130M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
130M
        }
175
6.59M
        src   += src_stride;
176
6.59M
        dst   += dst_stride;
177
6.59M
    }
178
559k
}
dsp.c:put_uni_luma_h_9
Line
Count
Source
157
13.7k
{
158
13.7k
    const pixel *src           = (const pixel*)_src;
159
13.7k
    pixel *dst                 = (pixel *)_dst;
160
13.7k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
13.7k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
13.7k
    const int8_t *filter       = hf;
163
13.7k
    const int shift            = 14 - BIT_DEPTH;
164
13.7k
#if BIT_DEPTH < 14
165
13.7k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
198k
    for (int y = 0; y < height; y++) {
171
4.29M
        for (int x = 0; x < width; x++) {
172
4.10M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
4.10M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
4.10M
        }
175
184k
        src   += src_stride;
176
184k
        dst   += dst_stride;
177
184k
    }
178
13.7k
}
dsp.c:put_uni_luma_h_10
Line
Count
Source
157
17.4k
{
158
17.4k
    const pixel *src           = (const pixel*)_src;
159
17.4k
    pixel *dst                 = (pixel *)_dst;
160
17.4k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
17.4k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
17.4k
    const int8_t *filter       = hf;
163
17.4k
    const int shift            = 14 - BIT_DEPTH;
164
17.4k
#if BIT_DEPTH < 14
165
17.4k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
225k
    for (int y = 0; y < height; y++) {
171
3.66M
        for (int x = 0; x < width; x++) {
172
3.45M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
3.45M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
3.45M
        }
175
207k
        src   += src_stride;
176
207k
        dst   += dst_stride;
177
207k
    }
178
17.4k
}
dsp.c:put_uni_luma_h_12
Line
Count
Source
157
96.6k
{
158
96.6k
    const pixel *src           = (const pixel*)_src;
159
96.6k
    pixel *dst                 = (pixel *)_dst;
160
96.6k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
96.6k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
96.6k
    const int8_t *filter       = hf;
163
96.6k
    const int shift            = 14 - BIT_DEPTH;
164
96.6k
#if BIT_DEPTH < 14
165
96.6k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
1.17M
    for (int y = 0; y < height; y++) {
171
22.2M
        for (int x = 0; x < width; x++) {
172
21.2M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
21.2M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
21.2M
        }
175
1.07M
        src   += src_stride;
176
1.07M
        dst   += dst_stride;
177
1.07M
    }
178
96.6k
}
dsp.c:put_uni_luma_h_8
Line
Count
Source
157
158k
{
158
158k
    const pixel *src           = (const pixel*)_src;
159
158k
    pixel *dst                 = (pixel *)_dst;
160
158k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
158k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
158k
    const int8_t *filter       = hf;
163
158k
    const int shift            = 14 - BIT_DEPTH;
164
158k
#if BIT_DEPTH < 14
165
158k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
2.08M
    for (int y = 0; y < height; y++) {
171
40.4M
        for (int x = 0; x < width; x++) {
172
38.4M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
38.4M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
38.4M
        }
175
1.92M
        src   += src_stride;
176
1.92M
        dst   += dst_stride;
177
1.92M
    }
178
158k
}
dsp.c:put_uni_luma_h_12
Line
Count
Source
157
96.6k
{
158
96.6k
    const pixel *src           = (const pixel*)_src;
159
96.6k
    pixel *dst                 = (pixel *)_dst;
160
96.6k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
96.6k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
96.6k
    const int8_t *filter       = hf;
163
96.6k
    const int shift            = 14 - BIT_DEPTH;
164
96.6k
#if BIT_DEPTH < 14
165
96.6k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
1.17M
    for (int y = 0; y < height; y++) {
171
22.2M
        for (int x = 0; x < width; x++) {
172
21.2M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
21.2M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
21.2M
        }
175
1.07M
        src   += src_stride;
176
1.07M
        dst   += dst_stride;
177
1.07M
    }
178
96.6k
}
dsp.c:put_uni_luma_h_10
Line
Count
Source
157
17.4k
{
158
17.4k
    const pixel *src           = (const pixel*)_src;
159
17.4k
    pixel *dst                 = (pixel *)_dst;
160
17.4k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
17.4k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
17.4k
    const int8_t *filter       = hf;
163
17.4k
    const int shift            = 14 - BIT_DEPTH;
164
17.4k
#if BIT_DEPTH < 14
165
17.4k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
225k
    for (int y = 0; y < height; y++) {
171
3.66M
        for (int x = 0; x < width; x++) {
172
3.45M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
3.45M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
3.45M
        }
175
207k
        src   += src_stride;
176
207k
        dst   += dst_stride;
177
207k
    }
178
17.4k
}
dsp.c:put_uni_luma_h_8
Line
Count
Source
157
158k
{
158
158k
    const pixel *src           = (const pixel*)_src;
159
158k
    pixel *dst                 = (pixel *)_dst;
160
158k
    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
161
158k
    const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
162
158k
    const int8_t *filter       = hf;
163
158k
    const int shift            = 14 - BIT_DEPTH;
164
158k
#if BIT_DEPTH < 14
165
158k
    const int offset           = 1 << (shift - 1);
166
#else
167
    const int offset           = 0;
168
#endif
169
170
2.08M
    for (int y = 0; y < height; y++) {
171
40.4M
        for (int x = 0; x < width; x++) {
172
38.4M
            const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
173
38.4M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
174
38.4M
        }
175
1.92M
        src   += src_stride;
176
1.92M
        dst   += dst_stride;
177
1.92M
    }
178
158k
}
179
180
static void FUNC(put_uni_luma_v)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
181
    const uint8_t *_src, const ptrdiff_t _src_stride,
182
    const int height, const int8_t *hf, const int8_t *vf, const int width)
183
572k
{
184
185
572k
    const pixel *src            = (const pixel*)_src;
186
572k
    pixel *dst                  = (pixel *)_dst;
187
572k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
572k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
572k
    const int8_t *filter        = vf;
190
572k
    const int shift             = 14 - BIT_DEPTH;
191
572k
#if BIT_DEPTH < 14
192
572k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
6.33M
    for (int y = 0; y < height; y++) {
198
98.5M
        for (int x = 0; x < width; x++) {
199
92.7M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
92.7M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
92.7M
        }
202
5.76M
        src   += src_stride;
203
5.76M
        dst   += dst_stride;
204
5.76M
    }
205
572k
}
dsp.c:put_uni_luma_v_9
Line
Count
Source
183
13.7k
{
184
185
13.7k
    const pixel *src            = (const pixel*)_src;
186
13.7k
    pixel *dst                  = (pixel *)_dst;
187
13.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
13.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
13.7k
    const int8_t *filter        = vf;
190
13.7k
    const int shift             = 14 - BIT_DEPTH;
191
13.7k
#if BIT_DEPTH < 14
192
13.7k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
162k
    for (int y = 0; y < height; y++) {
198
2.98M
        for (int x = 0; x < width; x++) {
199
2.83M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
2.83M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
2.83M
        }
202
148k
        src   += src_stride;
203
148k
        dst   += dst_stride;
204
148k
    }
205
13.7k
}
dsp.c:put_uni_luma_v_10
Line
Count
Source
183
33.8k
{
184
185
33.8k
    const pixel *src            = (const pixel*)_src;
186
33.8k
    pixel *dst                  = (pixel *)_dst;
187
33.8k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
33.8k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
33.8k
    const int8_t *filter        = vf;
190
33.8k
    const int shift             = 14 - BIT_DEPTH;
191
33.8k
#if BIT_DEPTH < 14
192
33.8k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
376k
    for (int y = 0; y < height; y++) {
198
5.70M
        for (int x = 0; x < width; x++) {
199
5.35M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
5.35M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
5.35M
        }
202
343k
        src   += src_stride;
203
343k
        dst   += dst_stride;
204
343k
    }
205
33.8k
}
dsp.c:put_uni_luma_v_12
Line
Count
Source
183
119k
{
184
185
119k
    const pixel *src            = (const pixel*)_src;
186
119k
    pixel *dst                  = (pixel *)_dst;
187
119k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
119k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
119k
    const int8_t *filter        = vf;
190
119k
    const int shift             = 14 - BIT_DEPTH;
191
119k
#if BIT_DEPTH < 14
192
119k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
1.16M
    for (int y = 0; y < height; y++) {
198
14.0M
        for (int x = 0; x < width; x++) {
199
12.9M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
12.9M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
12.9M
        }
202
1.04M
        src   += src_stride;
203
1.04M
        dst   += dst_stride;
204
1.04M
    }
205
119k
}
dsp.c:put_uni_luma_v_8
Line
Count
Source
183
126k
{
184
185
126k
    const pixel *src            = (const pixel*)_src;
186
126k
    pixel *dst                  = (pixel *)_dst;
187
126k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
126k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
126k
    const int8_t *filter        = vf;
190
126k
    const int shift             = 14 - BIT_DEPTH;
191
126k
#if BIT_DEPTH < 14
192
126k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
1.54M
    for (int y = 0; y < height; y++) {
198
28.0M
        for (int x = 0; x < width; x++) {
199
26.6M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
26.6M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
26.6M
        }
202
1.41M
        src   += src_stride;
203
1.41M
        dst   += dst_stride;
204
1.41M
    }
205
126k
}
dsp.c:put_uni_luma_v_12
Line
Count
Source
183
119k
{
184
185
119k
    const pixel *src            = (const pixel*)_src;
186
119k
    pixel *dst                  = (pixel *)_dst;
187
119k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
119k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
119k
    const int8_t *filter        = vf;
190
119k
    const int shift             = 14 - BIT_DEPTH;
191
119k
#if BIT_DEPTH < 14
192
119k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
1.16M
    for (int y = 0; y < height; y++) {
198
14.0M
        for (int x = 0; x < width; x++) {
199
12.9M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
12.9M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
12.9M
        }
202
1.04M
        src   += src_stride;
203
1.04M
        dst   += dst_stride;
204
1.04M
    }
205
119k
}
dsp.c:put_uni_luma_v_10
Line
Count
Source
183
33.8k
{
184
185
33.8k
    const pixel *src            = (const pixel*)_src;
186
33.8k
    pixel *dst                  = (pixel *)_dst;
187
33.8k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
33.8k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
33.8k
    const int8_t *filter        = vf;
190
33.8k
    const int shift             = 14 - BIT_DEPTH;
191
33.8k
#if BIT_DEPTH < 14
192
33.8k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
376k
    for (int y = 0; y < height; y++) {
198
5.70M
        for (int x = 0; x < width; x++) {
199
5.35M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
5.35M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
5.35M
        }
202
343k
        src   += src_stride;
203
343k
        dst   += dst_stride;
204
343k
    }
205
33.8k
}
dsp.c:put_uni_luma_v_8
Line
Count
Source
183
126k
{
184
185
126k
    const pixel *src            = (const pixel*)_src;
186
126k
    pixel *dst                  = (pixel *)_dst;
187
126k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
188
126k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
189
126k
    const int8_t *filter        = vf;
190
126k
    const int shift             = 14 - BIT_DEPTH;
191
126k
#if BIT_DEPTH < 14
192
126k
    const int offset            = 1 << (shift - 1);
193
#else
194
    const int offset            = 0;
195
#endif
196
197
1.54M
    for (int y = 0; y < height; y++) {
198
28.0M
        for (int x = 0; x < width; x++) {
199
26.6M
            const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
200
26.6M
            dst[x]        = av_clip_pixel((val + offset) >> shift);
201
26.6M
        }
202
1.41M
        src   += src_stride;
203
1.41M
        dst   += dst_stride;
204
1.41M
    }
205
126k
}
206
207
static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride,
208
    const uint8_t *_src, const ptrdiff_t _src_stride,
209
    const int height, const int8_t *hf, const int8_t *vf, const int width)
210
1.36M
{
211
1.36M
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
1.36M
    int16_t *tmp                = tmp_array;
213
1.36M
    const pixel *src            = (const pixel*)_src;
214
1.36M
    pixel *dst                  = (pixel *)_dst;
215
1.36M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
1.36M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
1.36M
    const int8_t *filter        = hf;
218
1.36M
    const int shift             =  14 - BIT_DEPTH;
219
1.36M
#if BIT_DEPTH < 14
220
1.36M
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
1.36M
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
25.1M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
347M
        for (int x = 0; x < width; x++)
228
323M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
23.7M
        src += src_stride;
230
23.7M
        tmp += MAX_PB_SIZE;
231
23.7M
    }
232
233
1.36M
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
1.36M
    filter = vf;
235
236
15.5M
    for (int y = 0; y < height; y++) {
237
244M
        for (int x = 0; x < width; x++) {
238
230M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
230M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
230M
        }
241
14.1M
        tmp += MAX_PB_SIZE;
242
14.1M
        dst += dst_stride;
243
14.1M
    }
244
245
1.36M
}
dsp.c:put_uni_luma_hv_9
Line
Count
Source
210
32.9k
{
211
32.9k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
32.9k
    int16_t *tmp                = tmp_array;
213
32.9k
    const pixel *src            = (const pixel*)_src;
214
32.9k
    pixel *dst                  = (pixel *)_dst;
215
32.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
32.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
32.9k
    const int8_t *filter        = hf;
218
32.9k
    const int shift             =  14 - BIT_DEPTH;
219
32.9k
#if BIT_DEPTH < 14
220
32.9k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
32.9k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
659k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
11.1M
        for (int x = 0; x < width; x++)
228
10.5M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
626k
        src += src_stride;
230
626k
        tmp += MAX_PB_SIZE;
231
626k
    }
232
233
32.9k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
32.9k
    filter = vf;
235
236
429k
    for (int y = 0; y < height; y++) {
237
8.09M
        for (int x = 0; x < width; x++) {
238
7.69M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
7.69M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
7.69M
        }
241
396k
        tmp += MAX_PB_SIZE;
242
396k
        dst += dst_stride;
243
396k
    }
244
245
32.9k
}
dsp.c:put_uni_luma_hv_10
Line
Count
Source
210
32.0k
{
211
32.0k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
32.0k
    int16_t *tmp                = tmp_array;
213
32.0k
    const pixel *src            = (const pixel*)_src;
214
32.0k
    pixel *dst                  = (pixel *)_dst;
215
32.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
32.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
32.0k
    const int8_t *filter        = hf;
218
32.0k
    const int shift             =  14 - BIT_DEPTH;
219
32.0k
#if BIT_DEPTH < 14
220
32.0k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
32.0k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
584k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
7.03M
        for (int x = 0; x < width; x++)
228
6.48M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
552k
        src += src_stride;
230
552k
        tmp += MAX_PB_SIZE;
231
552k
    }
232
233
32.0k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
32.0k
    filter = vf;
235
236
360k
    for (int y = 0; y < height; y++) {
237
4.69M
        for (int x = 0; x < width; x++) {
238
4.37M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
4.37M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
4.37M
        }
241
328k
        tmp += MAX_PB_SIZE;
242
328k
        dst += dst_stride;
243
328k
    }
244
245
32.0k
}
dsp.c:put_uni_luma_hv_12
Line
Count
Source
210
282k
{
211
282k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
282k
    int16_t *tmp                = tmp_array;
213
282k
    const pixel *src            = (const pixel*)_src;
214
282k
    pixel *dst                  = (pixel *)_dst;
215
282k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
282k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
282k
    const int8_t *filter        = hf;
218
282k
    const int shift             =  14 - BIT_DEPTH;
219
282k
#if BIT_DEPTH < 14
220
282k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
282k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
4.88M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
59.3M
        for (int x = 0; x < width; x++)
228
54.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
4.60M
        src += src_stride;
230
4.60M
        tmp += MAX_PB_SIZE;
231
4.60M
    }
232
233
282k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
282k
    filter = vf;
235
236
2.91M
    for (int y = 0; y < height; y++) {
237
40.3M
        for (int x = 0; x < width; x++) {
238
37.7M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
37.7M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
37.7M
        }
241
2.62M
        tmp += MAX_PB_SIZE;
242
2.62M
        dst += dst_stride;
243
2.62M
    }
244
245
282k
}
dsp.c:put_uni_luma_hv_8
Line
Count
Source
210
352k
{
211
352k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
352k
    int16_t *tmp                = tmp_array;
213
352k
    const pixel *src            = (const pixel*)_src;
214
352k
    pixel *dst                  = (pixel *)_dst;
215
352k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
352k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
352k
    const int8_t *filter        = hf;
218
352k
    const int shift             =  14 - BIT_DEPTH;
219
352k
#if BIT_DEPTH < 14
220
352k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
352k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
6.75M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
101M
        for (int x = 0; x < width; x++)
228
95.2M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
6.40M
        src += src_stride;
230
6.40M
        tmp += MAX_PB_SIZE;
231
6.40M
    }
232
233
352k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
352k
    filter = vf;
235
236
4.28M
    for (int y = 0; y < height; y++) {
237
73.0M
        for (int x = 0; x < width; x++) {
238
69.0M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
69.0M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
69.0M
        }
241
3.93M
        tmp += MAX_PB_SIZE;
242
3.93M
        dst += dst_stride;
243
3.93M
    }
244
245
352k
}
dsp.c:put_uni_luma_hv_12
Line
Count
Source
210
282k
{
211
282k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
282k
    int16_t *tmp                = tmp_array;
213
282k
    const pixel *src            = (const pixel*)_src;
214
282k
    pixel *dst                  = (pixel *)_dst;
215
282k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
282k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
282k
    const int8_t *filter        = hf;
218
282k
    const int shift             =  14 - BIT_DEPTH;
219
282k
#if BIT_DEPTH < 14
220
282k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
282k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
4.88M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
59.3M
        for (int x = 0; x < width; x++)
228
54.7M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
4.60M
        src += src_stride;
230
4.60M
        tmp += MAX_PB_SIZE;
231
4.60M
    }
232
233
282k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
282k
    filter = vf;
235
236
2.91M
    for (int y = 0; y < height; y++) {
237
40.3M
        for (int x = 0; x < width; x++) {
238
37.7M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
37.7M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
37.7M
        }
241
2.62M
        tmp += MAX_PB_SIZE;
242
2.62M
        dst += dst_stride;
243
2.62M
    }
244
245
282k
}
dsp.c:put_uni_luma_hv_10
Line
Count
Source
210
32.0k
{
211
32.0k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
32.0k
    int16_t *tmp                = tmp_array;
213
32.0k
    const pixel *src            = (const pixel*)_src;
214
32.0k
    pixel *dst                  = (pixel *)_dst;
215
32.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
32.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
32.0k
    const int8_t *filter        = hf;
218
32.0k
    const int shift             =  14 - BIT_DEPTH;
219
32.0k
#if BIT_DEPTH < 14
220
32.0k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
32.0k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
584k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
7.03M
        for (int x = 0; x < width; x++)
228
6.48M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
552k
        src += src_stride;
230
552k
        tmp += MAX_PB_SIZE;
231
552k
    }
232
233
32.0k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
32.0k
    filter = vf;
235
236
360k
    for (int y = 0; y < height; y++) {
237
4.69M
        for (int x = 0; x < width; x++) {
238
4.37M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
4.37M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
4.37M
        }
241
328k
        tmp += MAX_PB_SIZE;
242
328k
        dst += dst_stride;
243
328k
    }
244
245
32.0k
}
dsp.c:put_uni_luma_hv_8
Line
Count
Source
210
352k
{
211
352k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
212
352k
    int16_t *tmp                = tmp_array;
213
352k
    const pixel *src            = (const pixel*)_src;
214
352k
    pixel *dst                  = (pixel *)_dst;
215
352k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
216
352k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
217
352k
    const int8_t *filter        = hf;
218
352k
    const int shift             =  14 - BIT_DEPTH;
219
352k
#if BIT_DEPTH < 14
220
352k
    const int offset            = 1 << (shift - 1);
221
#else
222
    const int offset            = 0;
223
#endif
224
225
352k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
226
6.75M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
227
101M
        for (int x = 0; x < width; x++)
228
95.2M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
229
6.40M
        src += src_stride;
230
6.40M
        tmp += MAX_PB_SIZE;
231
6.40M
    }
232
233
352k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
234
352k
    filter = vf;
235
236
4.28M
    for (int y = 0; y < height; y++) {
237
73.0M
        for (int x = 0; x < width; x++) {
238
69.0M
            const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
239
69.0M
            dst[x]  = av_clip_pixel((val  + offset) >> shift);
240
69.0M
        }
241
3.93M
        tmp += MAX_PB_SIZE;
242
3.93M
        dst += dst_stride;
243
3.93M
    }
244
245
352k
}
246
247
static void FUNC(put_uni_luma_w_h)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
248
    const uint8_t *_src, const ptrdiff_t _src_stride, int height,
249
    const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf,
250
    const int width)
251
147k
{
252
147k
    const pixel *src            = (const pixel*)_src;
253
147k
    pixel *dst                  = (pixel *)_dst;
254
147k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
147k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
147k
    const int8_t *filter        = hf;
257
147k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
147k
    const int shift             = denom + 14 - BIT_DEPTH;
259
147k
#if BIT_DEPTH < 14
260
147k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
1.98M
    for (int y = 0; y < height; y++) {
266
38.8M
        for (int x = 0; x < width; x++)
267
37.0M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
1.83M
        src += src_stride;
269
1.83M
        dst += dst_stride;
270
1.83M
    }
271
147k
}
dsp.c:put_uni_luma_w_h_9
Line
Count
Source
251
10.1k
{
252
10.1k
    const pixel *src            = (const pixel*)_src;
253
10.1k
    pixel *dst                  = (pixel *)_dst;
254
10.1k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
10.1k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
10.1k
    const int8_t *filter        = hf;
257
10.1k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
10.1k
    const int shift             = denom + 14 - BIT_DEPTH;
259
10.1k
#if BIT_DEPTH < 14
260
10.1k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
204k
    for (int y = 0; y < height; y++) {
266
5.38M
        for (int x = 0; x < width; x++)
267
5.18M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
194k
        src += src_stride;
269
194k
        dst += dst_stride;
270
194k
    }
271
10.1k
}
dsp.c:put_uni_luma_w_h_10
Line
Count
Source
251
12.3k
{
252
12.3k
    const pixel *src            = (const pixel*)_src;
253
12.3k
    pixel *dst                  = (pixel *)_dst;
254
12.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
12.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
12.3k
    const int8_t *filter        = hf;
257
12.3k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
12.3k
    const int shift             = denom + 14 - BIT_DEPTH;
259
12.3k
#if BIT_DEPTH < 14
260
12.3k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
202k
    for (int y = 0; y < height; y++) {
266
4.98M
        for (int x = 0; x < width; x++)
267
4.79M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
190k
        src += src_stride;
269
190k
        dst += dst_stride;
270
190k
    }
271
12.3k
}
dsp.c:put_uni_luma_w_h_12
Line
Count
Source
251
30.9k
{
252
30.9k
    const pixel *src            = (const pixel*)_src;
253
30.9k
    pixel *dst                  = (pixel *)_dst;
254
30.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
30.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
30.9k
    const int8_t *filter        = hf;
257
30.9k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
30.9k
    const int shift             = denom + 14 - BIT_DEPTH;
259
30.9k
#if BIT_DEPTH < 14
260
30.9k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
394k
    for (int y = 0; y < height; y++) {
266
7.37M
        for (int x = 0; x < width; x++)
267
7.00M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
363k
        src += src_stride;
269
363k
        dst += dst_stride;
270
363k
    }
271
30.9k
}
dsp.c:put_uni_luma_w_h_8
Line
Count
Source
251
25.4k
{
252
25.4k
    const pixel *src            = (const pixel*)_src;
253
25.4k
    pixel *dst                  = (pixel *)_dst;
254
25.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
25.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
25.4k
    const int8_t *filter        = hf;
257
25.4k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
25.4k
    const int shift             = denom + 14 - BIT_DEPTH;
259
25.4k
#if BIT_DEPTH < 14
260
25.4k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
289k
    for (int y = 0; y < height; y++) {
266
4.36M
        for (int x = 0; x < width; x++)
267
4.10M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
264k
        src += src_stride;
269
264k
        dst += dst_stride;
270
264k
    }
271
25.4k
}
dsp.c:put_uni_luma_w_h_12
Line
Count
Source
251
30.9k
{
252
30.9k
    const pixel *src            = (const pixel*)_src;
253
30.9k
    pixel *dst                  = (pixel *)_dst;
254
30.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
30.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
30.9k
    const int8_t *filter        = hf;
257
30.9k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
30.9k
    const int shift             = denom + 14 - BIT_DEPTH;
259
30.9k
#if BIT_DEPTH < 14
260
30.9k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
394k
    for (int y = 0; y < height; y++) {
266
7.37M
        for (int x = 0; x < width; x++)
267
7.00M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
363k
        src += src_stride;
269
363k
        dst += dst_stride;
270
363k
    }
271
30.9k
}
dsp.c:put_uni_luma_w_h_10
Line
Count
Source
251
12.3k
{
252
12.3k
    const pixel *src            = (const pixel*)_src;
253
12.3k
    pixel *dst                  = (pixel *)_dst;
254
12.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
12.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
12.3k
    const int8_t *filter        = hf;
257
12.3k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
12.3k
    const int shift             = denom + 14 - BIT_DEPTH;
259
12.3k
#if BIT_DEPTH < 14
260
12.3k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
202k
    for (int y = 0; y < height; y++) {
266
4.98M
        for (int x = 0; x < width; x++)
267
4.79M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
190k
        src += src_stride;
269
190k
        dst += dst_stride;
270
190k
    }
271
12.3k
}
dsp.c:put_uni_luma_w_h_8
Line
Count
Source
251
25.4k
{
252
25.4k
    const pixel *src            = (const pixel*)_src;
253
25.4k
    pixel *dst                  = (pixel *)_dst;
254
25.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
255
25.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
256
25.4k
    const int8_t *filter        = hf;
257
25.4k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
258
25.4k
    const int shift             = denom + 14 - BIT_DEPTH;
259
25.4k
#if BIT_DEPTH < 14
260
25.4k
    const int offset            = 1 << (shift - 1);
261
#else
262
    const int offset            = 0;
263
#endif
264
265
289k
    for (int y = 0; y < height; y++) {
266
4.36M
        for (int x = 0; x < width; x++)
267
4.10M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
268
264k
        src += src_stride;
269
264k
        dst += dst_stride;
270
264k
    }
271
25.4k
}
272
273
static void FUNC(put_uni_luma_w_v)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
274
    const uint8_t *_src, const ptrdiff_t _src_stride, const int height,
275
    const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf,
276
    const int width)
277
150k
{
278
150k
    const pixel *src            = (const pixel*)_src;
279
150k
    pixel *dst                  = (pixel *)_dst;
280
150k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
150k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
150k
    const int8_t *filter        = vf;
283
150k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
150k
    const int shift             = denom + 14 - BIT_DEPTH;
285
150k
#if BIT_DEPTH < 14
286
150k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
2.04M
    for (int y = 0; y < height; y++) {
292
40.0M
        for (int x = 0; x < width; x++)
293
38.1M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
1.88M
        src += src_stride;
295
1.88M
        dst += dst_stride;
296
1.88M
    }
297
150k
}
dsp.c:put_uni_luma_w_v_9
Line
Count
Source
277
10.6k
{
278
10.6k
    const pixel *src            = (const pixel*)_src;
279
10.6k
    pixel *dst                  = (pixel *)_dst;
280
10.6k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
10.6k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
10.6k
    const int8_t *filter        = vf;
283
10.6k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
10.6k
    const int shift             = denom + 14 - BIT_DEPTH;
285
10.6k
#if BIT_DEPTH < 14
286
10.6k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
179k
    for (int y = 0; y < height; y++) {
292
4.20M
        for (int x = 0; x < width; x++)
293
4.03M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
169k
        src += src_stride;
295
169k
        dst += dst_stride;
296
169k
    }
297
10.6k
}
dsp.c:put_uni_luma_w_v_10
Line
Count
Source
277
15.2k
{
278
15.2k
    const pixel *src            = (const pixel*)_src;
279
15.2k
    pixel *dst                  = (pixel *)_dst;
280
15.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
15.2k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
15.2k
    const int8_t *filter        = vf;
283
15.2k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
15.2k
    const int shift             = denom + 14 - BIT_DEPTH;
285
15.2k
#if BIT_DEPTH < 14
286
15.2k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
230k
    for (int y = 0; y < height; y++) {
292
5.47M
        for (int x = 0; x < width; x++)
293
5.25M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
215k
        src += src_stride;
295
215k
        dst += dst_stride;
296
215k
    }
297
15.2k
}
dsp.c:put_uni_luma_w_v_12
Line
Count
Source
277
25.1k
{
278
25.1k
    const pixel *src            = (const pixel*)_src;
279
25.1k
    pixel *dst                  = (pixel *)_dst;
280
25.1k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
25.1k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
25.1k
    const int8_t *filter        = vf;
283
25.1k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
25.1k
    const int shift             = denom + 14 - BIT_DEPTH;
285
25.1k
#if BIT_DEPTH < 14
286
25.1k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
347k
    for (int y = 0; y < height; y++) {
292
6.90M
        for (int x = 0; x < width; x++)
293
6.58M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
322k
        src += src_stride;
295
322k
        dst += dst_stride;
296
322k
    }
297
25.1k
}
dsp.c:put_uni_luma_w_v_8
Line
Count
Source
277
29.7k
{
278
29.7k
    const pixel *src            = (const pixel*)_src;
279
29.7k
    pixel *dst                  = (pixel *)_dst;
280
29.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
29.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
29.7k
    const int8_t *filter        = vf;
283
29.7k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
29.7k
    const int shift             = denom + 14 - BIT_DEPTH;
285
29.7k
#if BIT_DEPTH < 14
286
29.7k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
352k
    for (int y = 0; y < height; y++) {
292
5.54M
        for (int x = 0; x < width; x++)
293
5.22M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
322k
        src += src_stride;
295
322k
        dst += dst_stride;
296
322k
    }
297
29.7k
}
dsp.c:put_uni_luma_w_v_12
Line
Count
Source
277
25.1k
{
278
25.1k
    const pixel *src            = (const pixel*)_src;
279
25.1k
    pixel *dst                  = (pixel *)_dst;
280
25.1k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
25.1k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
25.1k
    const int8_t *filter        = vf;
283
25.1k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
25.1k
    const int shift             = denom + 14 - BIT_DEPTH;
285
25.1k
#if BIT_DEPTH < 14
286
25.1k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
347k
    for (int y = 0; y < height; y++) {
292
6.90M
        for (int x = 0; x < width; x++)
293
6.58M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
322k
        src += src_stride;
295
322k
        dst += dst_stride;
296
322k
    }
297
25.1k
}
dsp.c:put_uni_luma_w_v_10
Line
Count
Source
277
15.2k
{
278
15.2k
    const pixel *src            = (const pixel*)_src;
279
15.2k
    pixel *dst                  = (pixel *)_dst;
280
15.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
15.2k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
15.2k
    const int8_t *filter        = vf;
283
15.2k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
15.2k
    const int shift             = denom + 14 - BIT_DEPTH;
285
15.2k
#if BIT_DEPTH < 14
286
15.2k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
230k
    for (int y = 0; y < height; y++) {
292
5.47M
        for (int x = 0; x < width; x++)
293
5.25M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
215k
        src += src_stride;
295
215k
        dst += dst_stride;
296
215k
    }
297
15.2k
}
dsp.c:put_uni_luma_w_v_8
Line
Count
Source
277
29.7k
{
278
29.7k
    const pixel *src            = (const pixel*)_src;
279
29.7k
    pixel *dst                  = (pixel *)_dst;
280
29.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
281
29.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
282
29.7k
    const int8_t *filter        = vf;
283
29.7k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
284
29.7k
    const int shift             = denom + 14 - BIT_DEPTH;
285
29.7k
#if BIT_DEPTH < 14
286
29.7k
    const int offset            = 1 << (shift - 1);
287
#else
288
    const int offset            = 0;
289
#endif
290
291
352k
    for (int y = 0; y < height; y++) {
292
5.54M
        for (int x = 0; x < width; x++)
293
5.22M
            dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
294
322k
        src += src_stride;
295
322k
        dst += dst_stride;
296
322k
    }
297
29.7k
}
298
299
static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst,  const ptrdiff_t _dst_stride,
300
    const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom,
301
    const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width)
302
433k
{
303
433k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
433k
    int16_t *tmp                = tmp_array;
305
433k
    const pixel *src            = (const pixel*)_src;
306
433k
    pixel *dst                  = (pixel *)_dst;
307
433k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
433k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
433k
    const int8_t *filter        = hf;
310
433k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
433k
    const int shift             = denom + 14 - BIT_DEPTH;
312
433k
#if BIT_DEPTH < 14
313
433k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
433k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
9.03M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
152M
        for (int x = 0; x < width; x++)
321
143M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
8.60M
        src += src_stride;
323
8.60M
        tmp += MAX_PB_SIZE;
324
8.60M
    }
325
326
433k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
433k
    filter = vf;
328
6.00M
    for (int y = 0; y < height; y++) {
329
110M
        for (int x = 0; x < width; x++)
330
105M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
5.56M
        tmp += MAX_PB_SIZE;
332
5.56M
        dst += dst_stride;
333
5.56M
    }
334
433k
}
dsp.c:put_uni_luma_w_hv_9
Line
Count
Source
302
18.0k
{
303
18.0k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
18.0k
    int16_t *tmp                = tmp_array;
305
18.0k
    const pixel *src            = (const pixel*)_src;
306
18.0k
    pixel *dst                  = (pixel *)_dst;
307
18.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
18.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
18.0k
    const int8_t *filter        = hf;
310
18.0k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
18.0k
    const int shift             = denom + 14 - BIT_DEPTH;
312
18.0k
#if BIT_DEPTH < 14
313
18.0k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
18.0k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
490k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
12.1M
        for (int x = 0; x < width; x++)
321
11.6M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
472k
        src += src_stride;
323
472k
        tmp += MAX_PB_SIZE;
324
472k
    }
325
326
18.0k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
18.0k
    filter = vf;
328
363k
    for (int y = 0; y < height; y++) {
329
9.48M
        for (int x = 0; x < width; x++)
330
9.14M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
345k
        tmp += MAX_PB_SIZE;
332
345k
        dst += dst_stride;
333
345k
    }
334
18.0k
}
dsp.c:put_uni_luma_w_hv_10
Line
Count
Source
302
28.7k
{
303
28.7k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
28.7k
    int16_t *tmp                = tmp_array;
305
28.7k
    const pixel *src            = (const pixel*)_src;
306
28.7k
    pixel *dst                  = (pixel *)_dst;
307
28.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
28.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
28.7k
    const int8_t *filter        = hf;
310
28.7k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
28.7k
    const int shift             = denom + 14 - BIT_DEPTH;
312
28.7k
#if BIT_DEPTH < 14
313
28.7k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
28.7k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
690k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
15.4M
        for (int x = 0; x < width; x++)
321
14.8M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
661k
        src += src_stride;
323
661k
        tmp += MAX_PB_SIZE;
324
661k
    }
325
326
28.7k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
28.7k
    filter = vf;
328
489k
    for (int y = 0; y < height; y++) {
329
12.1M
        for (int x = 0; x < width; x++)
330
11.7M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
460k
        tmp += MAX_PB_SIZE;
332
460k
        dst += dst_stride;
333
460k
    }
334
28.7k
}
dsp.c:put_uni_luma_w_hv_12
Line
Count
Source
302
92.4k
{
303
92.4k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
92.4k
    int16_t *tmp                = tmp_array;
305
92.4k
    const pixel *src            = (const pixel*)_src;
306
92.4k
    pixel *dst                  = (pixel *)_dst;
307
92.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
92.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
92.4k
    const int8_t *filter        = hf;
310
92.4k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
92.4k
    const int shift             = denom + 14 - BIT_DEPTH;
312
92.4k
#if BIT_DEPTH < 14
313
92.4k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
92.4k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
1.75M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
25.2M
        for (int x = 0; x < width; x++)
321
23.6M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
1.65M
        src += src_stride;
323
1.65M
        tmp += MAX_PB_SIZE;
324
1.65M
    }
325
326
92.4k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
92.4k
    filter = vf;
328
1.10M
    for (int y = 0; y < height; y++) {
329
17.6M
        for (int x = 0; x < width; x++)
330
16.6M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
1.01M
        tmp += MAX_PB_SIZE;
332
1.01M
        dst += dst_stride;
333
1.01M
    }
334
92.4k
}
dsp.c:put_uni_luma_w_hv_8
Line
Count
Source
302
86.3k
{
303
86.3k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
86.3k
    int16_t *tmp                = tmp_array;
305
86.3k
    const pixel *src            = (const pixel*)_src;
306
86.3k
    pixel *dst                  = (pixel *)_dst;
307
86.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
86.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
86.3k
    const int8_t *filter        = hf;
310
86.3k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
86.3k
    const int shift             = denom + 14 - BIT_DEPTH;
312
86.3k
#if BIT_DEPTH < 14
313
86.3k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
86.3k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
1.83M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
29.2M
        for (int x = 0; x < width; x++)
321
27.4M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
1.74M
        src += src_stride;
323
1.74M
        tmp += MAX_PB_SIZE;
324
1.74M
    }
325
326
86.3k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
86.3k
    filter = vf;
328
1.22M
    for (int y = 0; y < height; y++) {
329
20.8M
        for (int x = 0; x < width; x++)
330
19.6M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
1.13M
        tmp += MAX_PB_SIZE;
332
1.13M
        dst += dst_stride;
333
1.13M
    }
334
86.3k
}
dsp.c:put_uni_luma_w_hv_12
Line
Count
Source
302
92.4k
{
303
92.4k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
92.4k
    int16_t *tmp                = tmp_array;
305
92.4k
    const pixel *src            = (const pixel*)_src;
306
92.4k
    pixel *dst                  = (pixel *)_dst;
307
92.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
92.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
92.4k
    const int8_t *filter        = hf;
310
92.4k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
92.4k
    const int shift             = denom + 14 - BIT_DEPTH;
312
92.4k
#if BIT_DEPTH < 14
313
92.4k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
92.4k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
1.75M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
25.2M
        for (int x = 0; x < width; x++)
321
23.6M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
1.65M
        src += src_stride;
323
1.65M
        tmp += MAX_PB_SIZE;
324
1.65M
    }
325
326
92.4k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
92.4k
    filter = vf;
328
1.10M
    for (int y = 0; y < height; y++) {
329
17.6M
        for (int x = 0; x < width; x++)
330
16.6M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
1.01M
        tmp += MAX_PB_SIZE;
332
1.01M
        dst += dst_stride;
333
1.01M
    }
334
92.4k
}
dsp.c:put_uni_luma_w_hv_10
Line
Count
Source
302
28.7k
{
303
28.7k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
28.7k
    int16_t *tmp                = tmp_array;
305
28.7k
    const pixel *src            = (const pixel*)_src;
306
28.7k
    pixel *dst                  = (pixel *)_dst;
307
28.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
28.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
28.7k
    const int8_t *filter        = hf;
310
28.7k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
28.7k
    const int shift             = denom + 14 - BIT_DEPTH;
312
28.7k
#if BIT_DEPTH < 14
313
28.7k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
28.7k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
690k
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
15.4M
        for (int x = 0; x < width; x++)
321
14.8M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
661k
        src += src_stride;
323
661k
        tmp += MAX_PB_SIZE;
324
661k
    }
325
326
28.7k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
28.7k
    filter = vf;
328
489k
    for (int y = 0; y < height; y++) {
329
12.1M
        for (int x = 0; x < width; x++)
330
11.7M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
460k
        tmp += MAX_PB_SIZE;
332
460k
        dst += dst_stride;
333
460k
    }
334
28.7k
}
dsp.c:put_uni_luma_w_hv_8
Line
Count
Source
302
86.3k
{
303
86.3k
    int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE];
304
86.3k
    int16_t *tmp                = tmp_array;
305
86.3k
    const pixel *src            = (const pixel*)_src;
306
86.3k
    pixel *dst                  = (pixel *)_dst;
307
86.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
308
86.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
309
86.3k
    const int8_t *filter        = hf;
310
86.3k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
311
86.3k
    const int shift             = denom + 14 - BIT_DEPTH;
312
86.3k
#if BIT_DEPTH < 14
313
86.3k
    const int offset            = 1 << (shift - 1);
314
#else
315
    const int offset            = 0;
316
#endif
317
318
86.3k
    src   -= LUMA_EXTRA_BEFORE * src_stride;
319
1.83M
    for (int y = 0; y < height + LUMA_EXTRA; y++) {
320
29.2M
        for (int x = 0; x < width; x++)
321
27.4M
            tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
322
1.74M
        src += src_stride;
323
1.74M
        tmp += MAX_PB_SIZE;
324
1.74M
    }
325
326
86.3k
    tmp    = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE;
327
86.3k
    filter = vf;
328
1.22M
    for (int y = 0; y < height; y++) {
329
20.8M
        for (int x = 0; x < width; x++)
330
19.6M
            dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
331
1.13M
        tmp += MAX_PB_SIZE;
332
1.13M
        dst += dst_stride;
333
1.13M
    }
334
86.3k
}
335
336
#define CHROMA_FILTER(src, stride)                                             \
337
1.29G
    (filter[0] * src[x - stride] +                                             \
338
1.29G
     filter[1] * src[x]          +                                             \
339
1.29G
     filter[2] * src[x + stride] +                                             \
340
1.29G
     filter[3] * src[x + 2 * stride])
341
342
static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
343
    const int height, const int8_t *hf, const int8_t *vf, const int width)
344
614k
{
345
614k
    const pixel *src            = (const pixel *)_src;
346
614k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
614k
    const int8_t *filter        = hf;
348
349
7.71M
    for (int y = 0; y < height; y++) {
350
123M
        for (int x = 0; x < width; x++)
351
116M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
7.10M
        src += src_stride;
353
7.10M
        dst += MAX_PB_SIZE;
354
7.10M
    }
355
614k
}
dsp.c:put_chroma_h_9
Line
Count
Source
344
50.0k
{
345
50.0k
    const pixel *src            = (const pixel *)_src;
346
50.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
50.0k
    const int8_t *filter        = hf;
348
349
931k
    for (int y = 0; y < height; y++) {
350
13.6M
        for (int x = 0; x < width; x++)
351
12.7M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
881k
        src += src_stride;
353
881k
        dst += MAX_PB_SIZE;
354
881k
    }
355
50.0k
}
dsp.c:put_chroma_h_10
Line
Count
Source
344
36.3k
{
345
36.3k
    const pixel *src            = (const pixel *)_src;
346
36.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
36.3k
    const int8_t *filter        = hf;
348
349
661k
    for (int y = 0; y < height; y++) {
350
10.0M
        for (int x = 0; x < width; x++)
351
9.41M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
625k
        src += src_stride;
353
625k
        dst += MAX_PB_SIZE;
354
625k
    }
355
36.3k
}
dsp.c:put_chroma_h_12
Line
Count
Source
344
132k
{
345
132k
    const pixel *src            = (const pixel *)_src;
346
132k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
132k
    const int8_t *filter        = hf;
348
349
1.64M
    for (int y = 0; y < height; y++) {
350
31.9M
        for (int x = 0; x < width; x++)
351
30.3M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
1.51M
        src += src_stride;
353
1.51M
        dst += MAX_PB_SIZE;
354
1.51M
    }
355
132k
}
dsp.c:put_chroma_h_8
Line
Count
Source
344
113k
{
345
113k
    const pixel *src            = (const pixel *)_src;
346
113k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
113k
    const int8_t *filter        = hf;
348
349
1.08M
    for (int y = 0; y < height; y++) {
350
13.0M
        for (int x = 0; x < width; x++)
351
12.0M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
971k
        src += src_stride;
353
971k
        dst += MAX_PB_SIZE;
354
971k
    }
355
113k
}
dsp.c:put_chroma_h_12
Line
Count
Source
344
132k
{
345
132k
    const pixel *src            = (const pixel *)_src;
346
132k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
132k
    const int8_t *filter        = hf;
348
349
1.64M
    for (int y = 0; y < height; y++) {
350
31.9M
        for (int x = 0; x < width; x++)
351
30.3M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
1.51M
        src += src_stride;
353
1.51M
        dst += MAX_PB_SIZE;
354
1.51M
    }
355
132k
}
dsp.c:put_chroma_h_10
Line
Count
Source
344
36.3k
{
345
36.3k
    const pixel *src            = (const pixel *)_src;
346
36.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
36.3k
    const int8_t *filter        = hf;
348
349
661k
    for (int y = 0; y < height; y++) {
350
10.0M
        for (int x = 0; x < width; x++)
351
9.41M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
625k
        src += src_stride;
353
625k
        dst += MAX_PB_SIZE;
354
625k
    }
355
36.3k
}
dsp.c:put_chroma_h_8
Line
Count
Source
344
113k
{
345
113k
    const pixel *src            = (const pixel *)_src;
346
113k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
347
113k
    const int8_t *filter        = hf;
348
349
1.08M
    for (int y = 0; y < height; y++) {
350
13.0M
        for (int x = 0; x < width; x++)
351
12.0M
            dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
352
971k
        src += src_stride;
353
971k
        dst += MAX_PB_SIZE;
354
971k
    }
355
113k
}
356
357
static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
358
    const int height, const int8_t *hf, const int8_t *vf, const int width)
359
487k
{
360
487k
    const pixel *src            = (const pixel *)_src;
361
487k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
487k
    const int8_t *filter        = vf;
363
364
6.44M
    for (int y = 0; y < height; y++) {
365
113M
        for (int x = 0; x < width; x++)
366
107M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
5.95M
        src += src_stride;
368
5.95M
        dst += MAX_PB_SIZE;
369
5.95M
    }
370
487k
}
dsp.c:put_chroma_v_9
Line
Count
Source
359
65.1k
{
360
65.1k
    const pixel *src            = (const pixel *)_src;
361
65.1k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
65.1k
    const int8_t *filter        = vf;
363
364
1.34M
    for (int y = 0; y < height; y++) {
365
21.1M
        for (int x = 0; x < width; x++)
366
19.8M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
1.27M
        src += src_stride;
368
1.27M
        dst += MAX_PB_SIZE;
369
1.27M
    }
370
65.1k
}
dsp.c:put_chroma_v_10
Line
Count
Source
359
19.2k
{
360
19.2k
    const pixel *src            = (const pixel *)_src;
361
19.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
19.2k
    const int8_t *filter        = vf;
363
364
261k
    for (int y = 0; y < height; y++) {
365
4.68M
        for (int x = 0; x < width; x++)
366
4.44M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
242k
        src += src_stride;
368
242k
        dst += MAX_PB_SIZE;
369
242k
    }
370
19.2k
}
dsp.c:put_chroma_v_12
Line
Count
Source
359
104k
{
360
104k
    const pixel *src            = (const pixel *)_src;
361
104k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
104k
    const int8_t *filter        = vf;
363
364
1.42M
    for (int y = 0; y < height; y++) {
365
30.1M
        for (int x = 0; x < width; x++)
366
28.8M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
1.31M
        src += src_stride;
368
1.31M
        dst += MAX_PB_SIZE;
369
1.31M
    }
370
104k
}
dsp.c:put_chroma_v_8
Line
Count
Source
359
87.8k
{
360
87.8k
    const pixel *src            = (const pixel *)_src;
361
87.8k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
87.8k
    const int8_t *filter        = vf;
363
364
866k
    for (int y = 0; y < height; y++) {
365
11.2M
        for (int x = 0; x < width; x++)
366
10.4M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
778k
        src += src_stride;
368
778k
        dst += MAX_PB_SIZE;
369
778k
    }
370
87.8k
}
dsp.c:put_chroma_v_12
Line
Count
Source
359
104k
{
360
104k
    const pixel *src            = (const pixel *)_src;
361
104k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
104k
    const int8_t *filter        = vf;
363
364
1.42M
    for (int y = 0; y < height; y++) {
365
30.1M
        for (int x = 0; x < width; x++)
366
28.8M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
1.31M
        src += src_stride;
368
1.31M
        dst += MAX_PB_SIZE;
369
1.31M
    }
370
104k
}
dsp.c:put_chroma_v_10
Line
Count
Source
359
19.2k
{
360
19.2k
    const pixel *src            = (const pixel *)_src;
361
19.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
19.2k
    const int8_t *filter        = vf;
363
364
261k
    for (int y = 0; y < height; y++) {
365
4.68M
        for (int x = 0; x < width; x++)
366
4.44M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
242k
        src += src_stride;
368
242k
        dst += MAX_PB_SIZE;
369
242k
    }
370
19.2k
}
dsp.c:put_chroma_v_8
Line
Count
Source
359
87.8k
{
360
87.8k
    const pixel *src            = (const pixel *)_src;
361
87.8k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
362
87.8k
    const int8_t *filter        = vf;
363
364
866k
    for (int y = 0; y < height; y++) {
365
11.2M
        for (int x = 0; x < width; x++)
366
10.4M
            dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8);
367
778k
        src += src_stride;
368
778k
        dst += MAX_PB_SIZE;
369
778k
    }
370
87.8k
}
371
372
static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
373
    const int height, const int8_t *hf, const int8_t *vf, const int width)
374
1.61M
{
375
1.61M
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
1.61M
    int16_t *tmp                = tmp_array;
377
1.61M
    const pixel *src            = (const pixel *)_src;
378
1.61M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
1.61M
    const int8_t *filter        = hf;
380
381
1.61M
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
24.3M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
377M
        for (int x = 0; x < width; x++)
385
354M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
22.7M
        src += src_stride;
387
22.7M
        tmp += MAX_PB_SIZE;
388
22.7M
    }
389
390
1.61M
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
1.61M
    filter = vf;
392
393
19.5M
    for (int y = 0; y < height; y++) {
394
323M
        for (int x = 0; x < width; x++)
395
306M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
17.9M
        tmp += MAX_PB_SIZE;
397
17.9M
        dst += MAX_PB_SIZE;
398
17.9M
    }
399
1.61M
}
dsp.c:put_chroma_hv_9
Line
Count
Source
374
143k
{
375
143k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
143k
    int16_t *tmp                = tmp_array;
377
143k
    const pixel *src            = (const pixel *)_src;
378
143k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
143k
    const int8_t *filter        = hf;
380
381
143k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
3.52M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
52.9M
        for (int x = 0; x < width; x++)
385
49.5M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
3.38M
        src += src_stride;
387
3.38M
        tmp += MAX_PB_SIZE;
388
3.38M
    }
389
390
143k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
143k
    filter = vf;
392
393
3.09M
    for (int y = 0; y < height; y++) {
394
47.3M
        for (int x = 0; x < width; x++)
395
44.4M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
2.95M
        tmp += MAX_PB_SIZE;
397
2.95M
        dst += MAX_PB_SIZE;
398
2.95M
    }
399
143k
}
dsp.c:put_chroma_hv_10
Line
Count
Source
374
69.9k
{
375
69.9k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
69.9k
    int16_t *tmp                = tmp_array;
377
69.9k
    const pixel *src            = (const pixel *)_src;
378
69.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
69.9k
    const int8_t *filter        = hf;
380
381
69.9k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
1.18M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
19.4M
        for (int x = 0; x < width; x++)
385
18.3M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
1.11M
        src += src_stride;
387
1.11M
        tmp += MAX_PB_SIZE;
388
1.11M
    }
389
390
69.9k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
69.9k
    filter = vf;
392
393
976k
    for (int y = 0; y < height; y++) {
394
16.9M
        for (int x = 0; x < width; x++)
395
16.0M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
906k
        tmp += MAX_PB_SIZE;
397
906k
        dst += MAX_PB_SIZE;
398
906k
    }
399
69.9k
}
dsp.c:put_chroma_hv_12
Line
Count
Source
374
304k
{
375
304k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
304k
    int16_t *tmp                = tmp_array;
377
304k
    const pixel *src            = (const pixel *)_src;
378
304k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
304k
    const int8_t *filter        = hf;
380
381
304k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
4.88M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
96.7M
        for (int x = 0; x < width; x++)
385
92.2M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
4.57M
        src += src_stride;
387
4.57M
        tmp += MAX_PB_SIZE;
388
4.57M
    }
389
390
304k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
304k
    filter = vf;
392
393
3.96M
    for (int y = 0; y < height; y++) {
394
85.0M
        for (int x = 0; x < width; x++)
395
81.3M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
3.66M
        tmp += MAX_PB_SIZE;
397
3.66M
        dst += MAX_PB_SIZE;
398
3.66M
    }
399
304k
}
dsp.c:put_chroma_hv_8
Line
Count
Source
374
359k
{
375
359k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
359k
    int16_t *tmp                = tmp_array;
377
359k
    const pixel *src            = (const pixel *)_src;
378
359k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
359k
    const int8_t *filter        = hf;
380
381
359k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
4.35M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
45.7M
        for (int x = 0; x < width; x++)
385
41.8M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
3.99M
        src += src_stride;
387
3.99M
        tmp += MAX_PB_SIZE;
388
3.99M
    }
389
390
359k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
359k
    filter = vf;
392
393
3.27M
    for (int y = 0; y < height; y++) {
394
36.3M
        for (int x = 0; x < width; x++)
395
33.4M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
2.91M
        tmp += MAX_PB_SIZE;
397
2.91M
        dst += MAX_PB_SIZE;
398
2.91M
    }
399
359k
}
dsp.c:put_chroma_hv_12
Line
Count
Source
374
304k
{
375
304k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
304k
    int16_t *tmp                = tmp_array;
377
304k
    const pixel *src            = (const pixel *)_src;
378
304k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
304k
    const int8_t *filter        = hf;
380
381
304k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
4.88M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
96.7M
        for (int x = 0; x < width; x++)
385
92.2M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
4.57M
        src += src_stride;
387
4.57M
        tmp += MAX_PB_SIZE;
388
4.57M
    }
389
390
304k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
304k
    filter = vf;
392
393
3.96M
    for (int y = 0; y < height; y++) {
394
85.0M
        for (int x = 0; x < width; x++)
395
81.3M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
3.66M
        tmp += MAX_PB_SIZE;
397
3.66M
        dst += MAX_PB_SIZE;
398
3.66M
    }
399
304k
}
dsp.c:put_chroma_hv_10
Line
Count
Source
374
69.9k
{
375
69.9k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
69.9k
    int16_t *tmp                = tmp_array;
377
69.9k
    const pixel *src            = (const pixel *)_src;
378
69.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
69.9k
    const int8_t *filter        = hf;
380
381
69.9k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
1.18M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
19.4M
        for (int x = 0; x < width; x++)
385
18.3M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
1.11M
        src += src_stride;
387
1.11M
        tmp += MAX_PB_SIZE;
388
1.11M
    }
389
390
69.9k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
69.9k
    filter = vf;
392
393
976k
    for (int y = 0; y < height; y++) {
394
16.9M
        for (int x = 0; x < width; x++)
395
16.0M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
906k
        tmp += MAX_PB_SIZE;
397
906k
        dst += MAX_PB_SIZE;
398
906k
    }
399
69.9k
}
dsp.c:put_chroma_hv_8
Line
Count
Source
374
359k
{
375
359k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
376
359k
    int16_t *tmp                = tmp_array;
377
359k
    const pixel *src            = (const pixel *)_src;
378
359k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
379
359k
    const int8_t *filter        = hf;
380
381
359k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
382
383
4.35M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
384
45.7M
        for (int x = 0; x < width; x++)
385
41.8M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
386
3.99M
        src += src_stride;
387
3.99M
        tmp += MAX_PB_SIZE;
388
3.99M
    }
389
390
359k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
391
359k
    filter = vf;
392
393
3.27M
    for (int y = 0; y < height; y++) {
394
36.3M
        for (int x = 0; x < width; x++)
395
33.4M
            dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6;
396
2.91M
        tmp += MAX_PB_SIZE;
397
2.91M
        dst += MAX_PB_SIZE;
398
2.91M
    }
399
359k
}
400
401
static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride,
402
    const uint8_t *_src, const ptrdiff_t _src_stride,
403
    const int height, const int8_t *hf, const int8_t *vf, const int width)
404
1.00M
{
405
1.00M
    const pixel *src            = (const pixel *)_src;
406
1.00M
    pixel *dst                  = (pixel *)_dst;
407
1.00M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
1.00M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
1.00M
    const int8_t *filter        = hf;
410
1.00M
    const int shift             = 14 - BIT_DEPTH;
411
1.00M
#if BIT_DEPTH < 14
412
1.00M
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
9.37M
    for (int y = 0; y < height; y++) {
418
135M
        for (int x = 0; x < width; x++)
419
126M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
8.37M
        src += src_stride;
421
8.37M
        dst += dst_stride;
422
8.37M
    }
423
1.00M
}
dsp.c:put_uni_chroma_h_9
Line
Count
Source
404
26.3k
{
405
26.3k
    const pixel *src            = (const pixel *)_src;
406
26.3k
    pixel *dst                  = (pixel *)_dst;
407
26.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
26.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
26.3k
    const int8_t *filter        = hf;
410
26.3k
    const int shift             = 14 - BIT_DEPTH;
411
26.3k
#if BIT_DEPTH < 14
412
26.3k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
320k
    for (int y = 0; y < height; y++) {
418
3.85M
        for (int x = 0; x < width; x++)
419
3.56M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
294k
        src += src_stride;
421
294k
        dst += dst_stride;
422
294k
    }
423
26.3k
}
dsp.c:put_uni_chroma_h_10
Line
Count
Source
404
35.0k
{
405
35.0k
    const pixel *src            = (const pixel *)_src;
406
35.0k
    pixel *dst                  = (pixel *)_dst;
407
35.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
35.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
35.0k
    const int8_t *filter        = hf;
410
35.0k
    const int shift             = 14 - BIT_DEPTH;
411
35.0k
#if BIT_DEPTH < 14
412
35.0k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
268k
    for (int y = 0; y < height; y++) {
418
2.08M
        for (int x = 0; x < width; x++)
419
1.85M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
233k
        src += src_stride;
421
233k
        dst += dst_stride;
422
233k
    }
423
35.0k
}
dsp.c:put_uni_chroma_h_12
Line
Count
Source
404
193k
{
405
193k
    const pixel *src            = (const pixel *)_src;
406
193k
    pixel *dst                  = (pixel *)_dst;
407
193k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
193k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
193k
    const int8_t *filter        = hf;
410
193k
    const int shift             = 14 - BIT_DEPTH;
411
193k
#if BIT_DEPTH < 14
412
193k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
2.34M
    for (int y = 0; y < height; y++) {
418
44.4M
        for (int x = 0; x < width; x++)
419
42.3M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
2.14M
        src += src_stride;
421
2.14M
        dst += dst_stride;
422
2.14M
    }
423
193k
}
dsp.c:put_uni_chroma_h_8
Line
Count
Source
404
259k
{
405
259k
    const pixel *src            = (const pixel *)_src;
406
259k
    pixel *dst                  = (pixel *)_dst;
407
259k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
259k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
259k
    const int8_t *filter        = hf;
410
259k
    const int shift             = 14 - BIT_DEPTH;
411
259k
#if BIT_DEPTH < 14
412
259k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
1.91M
    for (int y = 0; y < height; y++) {
418
19.1M
        for (int x = 0; x < width; x++)
419
17.4M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
1.65M
        src += src_stride;
421
1.65M
        dst += dst_stride;
422
1.65M
    }
423
259k
}
dsp.c:put_uni_chroma_h_12
Line
Count
Source
404
193k
{
405
193k
    const pixel *src            = (const pixel *)_src;
406
193k
    pixel *dst                  = (pixel *)_dst;
407
193k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
193k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
193k
    const int8_t *filter        = hf;
410
193k
    const int shift             = 14 - BIT_DEPTH;
411
193k
#if BIT_DEPTH < 14
412
193k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
2.34M
    for (int y = 0; y < height; y++) {
418
44.4M
        for (int x = 0; x < width; x++)
419
42.3M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
2.14M
        src += src_stride;
421
2.14M
        dst += dst_stride;
422
2.14M
    }
423
193k
}
dsp.c:put_uni_chroma_h_10
Line
Count
Source
404
35.0k
{
405
35.0k
    const pixel *src            = (const pixel *)_src;
406
35.0k
    pixel *dst                  = (pixel *)_dst;
407
35.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
35.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
35.0k
    const int8_t *filter        = hf;
410
35.0k
    const int shift             = 14 - BIT_DEPTH;
411
35.0k
#if BIT_DEPTH < 14
412
35.0k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
268k
    for (int y = 0; y < height; y++) {
418
2.08M
        for (int x = 0; x < width; x++)
419
1.85M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
233k
        src += src_stride;
421
233k
        dst += dst_stride;
422
233k
    }
423
35.0k
}
dsp.c:put_uni_chroma_h_8
Line
Count
Source
404
259k
{
405
259k
    const pixel *src            = (const pixel *)_src;
406
259k
    pixel *dst                  = (pixel *)_dst;
407
259k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
408
259k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
409
259k
    const int8_t *filter        = hf;
410
259k
    const int shift             = 14 - BIT_DEPTH;
411
259k
#if BIT_DEPTH < 14
412
259k
    const int offset            = 1 << (shift - 1);
413
#else
414
    const int offset            = 0;
415
#endif
416
417
1.91M
    for (int y = 0; y < height; y++) {
418
19.1M
        for (int x = 0; x < width; x++)
419
17.4M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
420
1.65M
        src += src_stride;
421
1.65M
        dst += dst_stride;
422
1.65M
    }
423
259k
}
424
425
static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride,
426
    const uint8_t *_src, const ptrdiff_t _src_stride,
427
    const int height, const int8_t *hf, const int8_t *vf, const int width)
428
1.03M
{
429
1.03M
    const pixel *src            = (const pixel *)_src;
430
1.03M
    pixel *dst                  = (pixel *)_dst;
431
1.03M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
1.03M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
1.03M
    const int8_t *filter        = vf;
434
1.03M
    const int shift             = 14 - BIT_DEPTH;
435
1.03M
#if BIT_DEPTH < 14
436
1.03M
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
8.45M
    for (int y = 0; y < height; y++) {
442
89.8M
        for (int x = 0; x < width; x++)
443
82.4M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
7.42M
        src += src_stride;
445
7.42M
        dst += dst_stride;
446
7.42M
    }
447
1.03M
}
dsp.c:put_uni_chroma_v_9
Line
Count
Source
428
38.5k
{
429
38.5k
    const pixel *src            = (const pixel *)_src;
430
38.5k
    pixel *dst                  = (pixel *)_dst;
431
38.5k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
38.5k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
38.5k
    const int8_t *filter        = vf;
434
38.5k
    const int shift             = 14 - BIT_DEPTH;
435
38.5k
#if BIT_DEPTH < 14
436
38.5k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
307k
    for (int y = 0; y < height; y++) {
442
2.89M
        for (int x = 0; x < width; x++)
443
2.62M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
268k
        src += src_stride;
445
268k
        dst += dst_stride;
446
268k
    }
447
38.5k
}
dsp.c:put_uni_chroma_v_10
Line
Count
Source
428
62.7k
{
429
62.7k
    const pixel *src            = (const pixel *)_src;
430
62.7k
    pixel *dst                  = (pixel *)_dst;
431
62.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
62.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
62.7k
    const int8_t *filter        = vf;
434
62.7k
    const int shift             = 14 - BIT_DEPTH;
435
62.7k
#if BIT_DEPTH < 14
436
62.7k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
389k
    for (int y = 0; y < height; y++) {
442
2.88M
        for (int x = 0; x < width; x++)
443
2.56M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
327k
        src += src_stride;
445
327k
        dst += dst_stride;
446
327k
    }
447
62.7k
}
dsp.c:put_uni_chroma_v_12
Line
Count
Source
428
238k
{
429
238k
    const pixel *src            = (const pixel *)_src;
430
238k
    pixel *dst                  = (pixel *)_dst;
431
238k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
238k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
238k
    const int8_t *filter        = vf;
434
238k
    const int shift             = 14 - BIT_DEPTH;
435
238k
#if BIT_DEPTH < 14
436
238k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
2.33M
    for (int y = 0; y < height; y++) {
442
27.8M
        for (int x = 0; x < width; x++)
443
25.7M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
2.09M
        src += src_stride;
445
2.09M
        dst += dst_stride;
446
2.09M
    }
447
238k
}
dsp.c:put_uni_chroma_v_8
Line
Count
Source
428
196k
{
429
196k
    const pixel *src            = (const pixel *)_src;
430
196k
    pixel *dst                  = (pixel *)_dst;
431
196k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
196k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
196k
    const int8_t *filter        = vf;
434
196k
    const int shift             = 14 - BIT_DEPTH;
435
196k
#if BIT_DEPTH < 14
436
196k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
1.35M
    for (int y = 0; y < height; y++) {
442
12.7M
        for (int x = 0; x < width; x++)
443
11.5M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
1.15M
        src += src_stride;
445
1.15M
        dst += dst_stride;
446
1.15M
    }
447
196k
}
dsp.c:put_uni_chroma_v_12
Line
Count
Source
428
238k
{
429
238k
    const pixel *src            = (const pixel *)_src;
430
238k
    pixel *dst                  = (pixel *)_dst;
431
238k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
238k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
238k
    const int8_t *filter        = vf;
434
238k
    const int shift             = 14 - BIT_DEPTH;
435
238k
#if BIT_DEPTH < 14
436
238k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
2.33M
    for (int y = 0; y < height; y++) {
442
27.8M
        for (int x = 0; x < width; x++)
443
25.7M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
2.09M
        src += src_stride;
445
2.09M
        dst += dst_stride;
446
2.09M
    }
447
238k
}
dsp.c:put_uni_chroma_v_10
Line
Count
Source
428
62.7k
{
429
62.7k
    const pixel *src            = (const pixel *)_src;
430
62.7k
    pixel *dst                  = (pixel *)_dst;
431
62.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
62.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
62.7k
    const int8_t *filter        = vf;
434
62.7k
    const int shift             = 14 - BIT_DEPTH;
435
62.7k
#if BIT_DEPTH < 14
436
62.7k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
389k
    for (int y = 0; y < height; y++) {
442
2.88M
        for (int x = 0; x < width; x++)
443
2.56M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
327k
        src += src_stride;
445
327k
        dst += dst_stride;
446
327k
    }
447
62.7k
}
dsp.c:put_uni_chroma_v_8
Line
Count
Source
428
196k
{
429
196k
    const pixel *src            = (const pixel *)_src;
430
196k
    pixel *dst                  = (pixel *)_dst;
431
196k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
432
196k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
433
196k
    const int8_t *filter        = vf;
434
196k
    const int shift             = 14 - BIT_DEPTH;
435
196k
#if BIT_DEPTH < 14
436
196k
    const int offset            = 1 << (shift - 1);
437
#else
438
    const int offset            = 0;
439
#endif
440
441
1.35M
    for (int y = 0; y < height; y++) {
442
12.7M
        for (int x = 0; x < width; x++)
443
11.5M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift);
444
1.15M
        src += src_stride;
445
1.15M
        dst += dst_stride;
446
1.15M
    }
447
196k
}
448
449
static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride,
450
    const uint8_t *_src, const ptrdiff_t _src_stride,
451
    const int height, const int8_t *hf, const int8_t *vf, const int width)
452
3.17M
{
453
3.17M
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
3.17M
    int16_t *tmp                = tmp_array;
455
3.17M
    const pixel *src            = (const pixel *)_src;
456
3.17M
    pixel *dst                  = (pixel *)_dst;
457
3.17M
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
3.17M
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
3.17M
    const int8_t *filter        = hf;
460
3.17M
    const int shift             = 14 - BIT_DEPTH;
461
3.17M
#if BIT_DEPTH < 14
462
3.17M
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
3.17M
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
34.7M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
342M
        for (int x = 0; x < width; x++)
471
311M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
31.6M
        src += src_stride;
473
31.6M
        tmp += MAX_PB_SIZE;
474
31.6M
    }
475
476
3.17M
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
3.17M
    filter = vf;
478
479
25.2M
    for (int y = 0; y < height; y++) {
480
272M
        for (int x = 0; x < width; x++)
481
249M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
22.1M
        tmp += MAX_PB_SIZE;
483
22.1M
        dst += dst_stride;
484
22.1M
    }
485
3.17M
}
dsp.c:put_uni_chroma_hv_9
Line
Count
Source
452
74.7k
{
453
74.7k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
74.7k
    int16_t *tmp                = tmp_array;
455
74.7k
    const pixel *src            = (const pixel *)_src;
456
74.7k
    pixel *dst                  = (pixel *)_dst;
457
74.7k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
74.7k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
74.7k
    const int8_t *filter        = hf;
460
74.7k
    const int shift             = 14 - BIT_DEPTH;
461
74.7k
#if BIT_DEPTH < 14
462
74.7k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
74.7k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
963k
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
9.33M
        for (int x = 0; x < width; x++)
471
8.44M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
889k
        src += src_stride;
473
889k
        tmp += MAX_PB_SIZE;
474
889k
    }
475
476
74.7k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
74.7k
    filter = vf;
478
479
739k
    for (int y = 0; y < height; y++) {
480
7.72M
        for (int x = 0; x < width; x++)
481
7.06M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
664k
        tmp += MAX_PB_SIZE;
483
664k
        dst += dst_stride;
484
664k
    }
485
74.7k
}
dsp.c:put_uni_chroma_hv_10
Line
Count
Source
452
77.3k
{
453
77.3k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
77.3k
    int16_t *tmp                = tmp_array;
455
77.3k
    const pixel *src            = (const pixel *)_src;
456
77.3k
    pixel *dst                  = (pixel *)_dst;
457
77.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
77.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
77.3k
    const int8_t *filter        = hf;
460
77.3k
    const int shift             = 14 - BIT_DEPTH;
461
77.3k
#if BIT_DEPTH < 14
462
77.3k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
77.3k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
766k
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
4.83M
        for (int x = 0; x < width; x++)
471
4.14M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
689k
        src += src_stride;
473
689k
        tmp += MAX_PB_SIZE;
474
689k
    }
475
476
77.3k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
77.3k
    filter = vf;
478
479
534k
    for (int y = 0; y < height; y++) {
480
3.49M
        for (int x = 0; x < width; x++)
481
3.04M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
457k
        tmp += MAX_PB_SIZE;
483
457k
        dst += dst_stride;
484
457k
    }
485
77.3k
}
dsp.c:put_uni_chroma_hv_12
Line
Count
Source
452
563k
{
453
563k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
563k
    int16_t *tmp                = tmp_array;
455
563k
    const pixel *src            = (const pixel *)_src;
456
563k
    pixel *dst                  = (pixel *)_dst;
457
563k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
563k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
563k
    const int8_t *filter        = hf;
460
563k
    const int shift             = 14 - BIT_DEPTH;
461
563k
#if BIT_DEPTH < 14
462
563k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
563k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
7.50M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
96.6M
        for (int x = 0; x < width; x++)
471
89.7M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
6.93M
        src += src_stride;
473
6.93M
        tmp += MAX_PB_SIZE;
474
6.93M
    }
475
476
563k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
563k
    filter = vf;
478
479
5.80M
    for (int y = 0; y < height; y++) {
480
80.4M
        for (int x = 0; x < width; x++)
481
75.2M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
5.24M
        tmp += MAX_PB_SIZE;
483
5.24M
        dst += dst_stride;
484
5.24M
    }
485
563k
}
dsp.c:put_uni_chroma_hv_8
Line
Count
Source
452
907k
{
453
907k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
907k
    int16_t *tmp                = tmp_array;
455
907k
    const pixel *src            = (const pixel *)_src;
456
907k
    pixel *dst                  = (pixel *)_dst;
457
907k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
907k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
907k
    const int8_t *filter        = hf;
460
907k
    const int shift             = 14 - BIT_DEPTH;
461
907k
#if BIT_DEPTH < 14
462
907k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
907k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
8.65M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
65.2M
        for (int x = 0; x < width; x++)
471
57.4M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
7.74M
        src += src_stride;
473
7.74M
        tmp += MAX_PB_SIZE;
474
7.74M
    }
475
476
907k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
907k
    filter = vf;
478
479
5.92M
    for (int y = 0; y < height; y++) {
480
48.1M
        for (int x = 0; x < width; x++)
481
43.1M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
5.02M
        tmp += MAX_PB_SIZE;
483
5.02M
        dst += dst_stride;
484
5.02M
    }
485
907k
}
dsp.c:put_uni_chroma_hv_12
Line
Count
Source
452
563k
{
453
563k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
563k
    int16_t *tmp                = tmp_array;
455
563k
    const pixel *src            = (const pixel *)_src;
456
563k
    pixel *dst                  = (pixel *)_dst;
457
563k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
563k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
563k
    const int8_t *filter        = hf;
460
563k
    const int shift             = 14 - BIT_DEPTH;
461
563k
#if BIT_DEPTH < 14
462
563k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
563k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
7.50M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
96.6M
        for (int x = 0; x < width; x++)
471
89.7M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
6.93M
        src += src_stride;
473
6.93M
        tmp += MAX_PB_SIZE;
474
6.93M
    }
475
476
563k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
563k
    filter = vf;
478
479
5.80M
    for (int y = 0; y < height; y++) {
480
80.4M
        for (int x = 0; x < width; x++)
481
75.2M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
5.24M
        tmp += MAX_PB_SIZE;
483
5.24M
        dst += dst_stride;
484
5.24M
    }
485
563k
}
dsp.c:put_uni_chroma_hv_10
Line
Count
Source
452
77.3k
{
453
77.3k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
77.3k
    int16_t *tmp                = tmp_array;
455
77.3k
    const pixel *src            = (const pixel *)_src;
456
77.3k
    pixel *dst                  = (pixel *)_dst;
457
77.3k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
77.3k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
77.3k
    const int8_t *filter        = hf;
460
77.3k
    const int shift             = 14 - BIT_DEPTH;
461
77.3k
#if BIT_DEPTH < 14
462
77.3k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
77.3k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
766k
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
4.83M
        for (int x = 0; x < width; x++)
471
4.14M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
689k
        src += src_stride;
473
689k
        tmp += MAX_PB_SIZE;
474
689k
    }
475
476
77.3k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
77.3k
    filter = vf;
478
479
534k
    for (int y = 0; y < height; y++) {
480
3.49M
        for (int x = 0; x < width; x++)
481
3.04M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
457k
        tmp += MAX_PB_SIZE;
483
457k
        dst += dst_stride;
484
457k
    }
485
77.3k
}
dsp.c:put_uni_chroma_hv_8
Line
Count
Source
452
907k
{
453
907k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
454
907k
    int16_t *tmp                = tmp_array;
455
907k
    const pixel *src            = (const pixel *)_src;
456
907k
    pixel *dst                  = (pixel *)_dst;
457
907k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
458
907k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
459
907k
    const int8_t *filter        = hf;
460
907k
    const int shift             = 14 - BIT_DEPTH;
461
907k
#if BIT_DEPTH < 14
462
907k
    const int offset            = 1 << (shift - 1);
463
#else
464
    const int offset            = 0;
465
#endif
466
467
907k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
468
469
8.65M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
470
65.2M
        for (int x = 0; x < width; x++)
471
57.4M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
472
7.74M
        src += src_stride;
473
7.74M
        tmp += MAX_PB_SIZE;
474
7.74M
    }
475
476
907k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
477
907k
    filter = vf;
478
479
5.92M
    for (int y = 0; y < height; y++) {
480
48.1M
        for (int x = 0; x < width; x++)
481
43.1M
            dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
482
5.02M
        tmp += MAX_PB_SIZE;
483
5.02M
        dst += dst_stride;
484
5.02M
    }
485
907k
}
486
487
static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride,
488
    const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox,
489
    const int8_t *hf, const int8_t *vf, int width)
490
275k
{
491
275k
    const pixel *src            = (const pixel *)_src;
492
275k
    pixel *dst                  = (pixel *)_dst;
493
275k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
275k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
275k
    const int8_t *filter        = hf;
496
275k
    const int shift             = denom + 14 - BIT_DEPTH;
497
275k
#if BIT_DEPTH < 14
498
275k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
275k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
2.60M
    for (int y = 0; y < height; y++) {
505
31.9M
        for (int x = 0; x < width; x++) {
506
29.6M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
29.6M
        }
508
2.32M
        dst += dst_stride;
509
2.32M
        src += src_stride;
510
2.32M
    }
511
275k
}
dsp.c:put_uni_chroma_w_h_9
Line
Count
Source
490
20.0k
{
491
20.0k
    const pixel *src            = (const pixel *)_src;
492
20.0k
    pixel *dst                  = (pixel *)_dst;
493
20.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
20.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
20.0k
    const int8_t *filter        = hf;
496
20.0k
    const int shift             = denom + 14 - BIT_DEPTH;
497
20.0k
#if BIT_DEPTH < 14
498
20.0k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
20.0k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
403k
    for (int y = 0; y < height; y++) {
505
6.15M
        for (int x = 0; x < width; x++) {
506
5.77M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
5.77M
        }
508
383k
        dst += dst_stride;
509
383k
        src += src_stride;
510
383k
    }
511
20.0k
}
dsp.c:put_uni_chroma_w_h_10
Line
Count
Source
490
22.4k
{
491
22.4k
    const pixel *src            = (const pixel *)_src;
492
22.4k
    pixel *dst                  = (pixel *)_dst;
493
22.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
22.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
22.4k
    const int8_t *filter        = hf;
496
22.4k
    const int shift             = denom + 14 - BIT_DEPTH;
497
22.4k
#if BIT_DEPTH < 14
498
22.4k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
22.4k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
256k
    for (int y = 0; y < height; y++) {
505
2.81M
        for (int x = 0; x < width; x++) {
506
2.57M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
2.57M
        }
508
233k
        dst += dst_stride;
509
233k
        src += src_stride;
510
233k
    }
511
22.4k
}
dsp.c:put_uni_chroma_w_h_12
Line
Count
Source
490
59.2k
{
491
59.2k
    const pixel *src            = (const pixel *)_src;
492
59.2k
    pixel *dst                  = (pixel *)_dst;
493
59.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
59.2k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
59.2k
    const int8_t *filter        = hf;
496
59.2k
    const int shift             = denom + 14 - BIT_DEPTH;
497
59.2k
#if BIT_DEPTH < 14
498
59.2k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
59.2k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
546k
    for (int y = 0; y < height; y++) {
505
7.63M
        for (int x = 0; x < width; x++) {
506
7.14M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
7.14M
        }
508
487k
        dst += dst_stride;
509
487k
        src += src_stride;
510
487k
    }
511
59.2k
}
dsp.c:put_uni_chroma_w_h_8
Line
Count
Source
490
45.9k
{
491
45.9k
    const pixel *src            = (const pixel *)_src;
492
45.9k
    pixel *dst                  = (pixel *)_dst;
493
45.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
45.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
45.9k
    const int8_t *filter        = hf;
496
45.9k
    const int shift             = denom + 14 - BIT_DEPTH;
497
45.9k
#if BIT_DEPTH < 14
498
45.9k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
45.9k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
297k
    for (int y = 0; y < height; y++) {
505
2.46M
        for (int x = 0; x < width; x++) {
506
2.20M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
2.20M
        }
508
251k
        dst += dst_stride;
509
251k
        src += src_stride;
510
251k
    }
511
45.9k
}
dsp.c:put_uni_chroma_w_h_12
Line
Count
Source
490
59.2k
{
491
59.2k
    const pixel *src            = (const pixel *)_src;
492
59.2k
    pixel *dst                  = (pixel *)_dst;
493
59.2k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
59.2k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
59.2k
    const int8_t *filter        = hf;
496
59.2k
    const int shift             = denom + 14 - BIT_DEPTH;
497
59.2k
#if BIT_DEPTH < 14
498
59.2k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
59.2k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
546k
    for (int y = 0; y < height; y++) {
505
7.63M
        for (int x = 0; x < width; x++) {
506
7.14M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
7.14M
        }
508
487k
        dst += dst_stride;
509
487k
        src += src_stride;
510
487k
    }
511
59.2k
}
dsp.c:put_uni_chroma_w_h_10
Line
Count
Source
490
22.4k
{
491
22.4k
    const pixel *src            = (const pixel *)_src;
492
22.4k
    pixel *dst                  = (pixel *)_dst;
493
22.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
22.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
22.4k
    const int8_t *filter        = hf;
496
22.4k
    const int shift             = denom + 14 - BIT_DEPTH;
497
22.4k
#if BIT_DEPTH < 14
498
22.4k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
22.4k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
256k
    for (int y = 0; y < height; y++) {
505
2.81M
        for (int x = 0; x < width; x++) {
506
2.57M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
2.57M
        }
508
233k
        dst += dst_stride;
509
233k
        src += src_stride;
510
233k
    }
511
22.4k
}
dsp.c:put_uni_chroma_w_h_8
Line
Count
Source
490
45.9k
{
491
45.9k
    const pixel *src            = (const pixel *)_src;
492
45.9k
    pixel *dst                  = (pixel *)_dst;
493
45.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
494
45.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
495
45.9k
    const int8_t *filter        = hf;
496
45.9k
    const int shift             = denom + 14 - BIT_DEPTH;
497
45.9k
#if BIT_DEPTH < 14
498
45.9k
    const int offset            = 1 << (shift - 1);
499
#else
500
    const int offset            = 0;
501
#endif
502
503
45.9k
    ox     = ox * (1 << (BIT_DEPTH - 8));
504
297k
    for (int y = 0; y < height; y++) {
505
2.46M
        for (int x = 0; x < width; x++) {
506
2.20M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
507
2.20M
        }
508
251k
        dst += dst_stride;
509
251k
        src += src_stride;
510
251k
    }
511
45.9k
}
512
513
static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride,
514
    const uint8_t *_src, const ptrdiff_t _src_stride, const int height,
515
    const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf,
516
    const int width)
517
252k
{
518
252k
    const pixel *src            = (const pixel *)_src;
519
252k
    pixel *dst                  = (pixel *)_dst;
520
252k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
252k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
252k
    const int8_t *filter        = vf;
523
252k
    const int shift             = denom + 14 - BIT_DEPTH;
524
252k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
252k
#if BIT_DEPTH < 14
526
252k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
2.32M
    for (int y = 0; y < height; y++) {
532
27.2M
        for (int x = 0; x < width; x++) {
533
25.2M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
25.2M
        }
535
2.07M
        dst += dst_stride;
536
2.07M
        src += src_stride;
537
2.07M
    }
538
252k
}
dsp.c:put_uni_chroma_w_v_9
Line
Count
Source
517
18.9k
{
518
18.9k
    const pixel *src            = (const pixel *)_src;
519
18.9k
    pixel *dst                  = (pixel *)_dst;
520
18.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
18.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
18.9k
    const int8_t *filter        = vf;
523
18.9k
    const int shift             = denom + 14 - BIT_DEPTH;
524
18.9k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
18.9k
#if BIT_DEPTH < 14
526
18.9k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
307k
    for (int y = 0; y < height; y++) {
532
4.69M
        for (int x = 0; x < width; x++) {
533
4.41M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
4.41M
        }
535
288k
        dst += dst_stride;
536
288k
        src += src_stride;
537
288k
    }
538
18.9k
}
dsp.c:put_uni_chroma_w_v_10
Line
Count
Source
517
23.9k
{
518
23.9k
    const pixel *src            = (const pixel *)_src;
519
23.9k
    pixel *dst                  = (pixel *)_dst;
520
23.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
23.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
23.9k
    const int8_t *filter        = vf;
523
23.9k
    const int shift             = denom + 14 - BIT_DEPTH;
524
23.9k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
23.9k
#if BIT_DEPTH < 14
526
23.9k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
257k
    for (int y = 0; y < height; y++) {
532
2.57M
        for (int x = 0; x < width; x++) {
533
2.34M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
2.34M
        }
535
233k
        dst += dst_stride;
536
233k
        src += src_stride;
537
233k
    }
538
23.9k
}
dsp.c:put_uni_chroma_w_v_12
Line
Count
Source
517
40.4k
{
518
40.4k
    const pixel *src            = (const pixel *)_src;
519
40.4k
    pixel *dst                  = (pixel *)_dst;
520
40.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
40.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
40.4k
    const int8_t *filter        = vf;
523
40.4k
    const int shift             = denom + 14 - BIT_DEPTH;
524
40.4k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
40.4k
#if BIT_DEPTH < 14
526
40.4k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
410k
    for (int y = 0; y < height; y++) {
532
6.04M
        for (int x = 0; x < width; x++) {
533
5.67M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
5.67M
        }
535
369k
        dst += dst_stride;
536
369k
        src += src_stride;
537
369k
    }
538
40.4k
}
dsp.c:put_uni_chroma_w_v_8
Line
Count
Source
517
52.5k
{
518
52.5k
    const pixel *src            = (const pixel *)_src;
519
52.5k
    pixel *dst                  = (pixel *)_dst;
520
52.5k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
52.5k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
52.5k
    const int8_t *filter        = vf;
523
52.5k
    const int shift             = denom + 14 - BIT_DEPTH;
524
52.5k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
52.5k
#if BIT_DEPTH < 14
526
52.5k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
341k
    for (int y = 0; y < height; y++) {
532
2.67M
        for (int x = 0; x < width; x++) {
533
2.38M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
2.38M
        }
535
288k
        dst += dst_stride;
536
288k
        src += src_stride;
537
288k
    }
538
52.5k
}
dsp.c:put_uni_chroma_w_v_12
Line
Count
Source
517
40.4k
{
518
40.4k
    const pixel *src            = (const pixel *)_src;
519
40.4k
    pixel *dst                  = (pixel *)_dst;
520
40.4k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
40.4k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
40.4k
    const int8_t *filter        = vf;
523
40.4k
    const int shift             = denom + 14 - BIT_DEPTH;
524
40.4k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
40.4k
#if BIT_DEPTH < 14
526
40.4k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
410k
    for (int y = 0; y < height; y++) {
532
6.04M
        for (int x = 0; x < width; x++) {
533
5.67M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
5.67M
        }
535
369k
        dst += dst_stride;
536
369k
        src += src_stride;
537
369k
    }
538
40.4k
}
dsp.c:put_uni_chroma_w_v_10
Line
Count
Source
517
23.9k
{
518
23.9k
    const pixel *src            = (const pixel *)_src;
519
23.9k
    pixel *dst                  = (pixel *)_dst;
520
23.9k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
23.9k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
23.9k
    const int8_t *filter        = vf;
523
23.9k
    const int shift             = denom + 14 - BIT_DEPTH;
524
23.9k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
23.9k
#if BIT_DEPTH < 14
526
23.9k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
257k
    for (int y = 0; y < height; y++) {
532
2.57M
        for (int x = 0; x < width; x++) {
533
2.34M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
2.34M
        }
535
233k
        dst += dst_stride;
536
233k
        src += src_stride;
537
233k
    }
538
23.9k
}
dsp.c:put_uni_chroma_w_v_8
Line
Count
Source
517
52.5k
{
518
52.5k
    const pixel *src            = (const pixel *)_src;
519
52.5k
    pixel *dst                  = (pixel *)_dst;
520
52.5k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
521
52.5k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
522
52.5k
    const int8_t *filter        = vf;
523
52.5k
    const int shift             = denom + 14 - BIT_DEPTH;
524
52.5k
    const int ox                = _ox * (1 << (BIT_DEPTH - 8));
525
52.5k
#if BIT_DEPTH < 14
526
52.5k
    int offset                  = 1 << (shift - 1);
527
#else
528
    int offset                  = 0;
529
#endif
530
531
341k
    for (int y = 0; y < height; y++) {
532
2.67M
        for (int x = 0; x < width; x++) {
533
2.38M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
534
2.38M
        }
535
288k
        dst += dst_stride;
536
288k
        src += src_stride;
537
288k
    }
538
52.5k
}
539
540
static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride,
541
     const uint8_t *_src, ptrdiff_t _src_stride,  int height, int denom, int wx, int ox,
542
     const int8_t *hf, const int8_t *vf, int width)
543
952k
{
544
952k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
952k
    int16_t *tmp                = tmp_array;
546
952k
    const pixel *src            = (const pixel *)_src;
547
952k
    pixel *dst                  = (pixel *)_dst;
548
952k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
952k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
952k
    const int8_t *filter        = hf;
551
952k
    const int shift             = denom + 14 - BIT_DEPTH;
552
952k
#if BIT_DEPTH < 14
553
952k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
952k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
11.1M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
111M
        for (int x = 0; x < width; x++)
562
101M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
10.2M
        src += src_stride;
564
10.2M
        tmp += MAX_PB_SIZE;
565
10.2M
    }
566
567
952k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
952k
    filter = vf;
569
570
952k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
8.29M
    for (int y = 0; y < height; y++) {
572
89.2M
        for (int x = 0; x < width; x++)
573
81.9M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
7.34M
        tmp += MAX_PB_SIZE;
575
7.34M
        dst += dst_stride;
576
7.34M
    }
577
952k
}
dsp.c:put_uni_chroma_w_hv_9
Line
Count
Source
543
36.0k
{
544
36.0k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
36.0k
    int16_t *tmp                = tmp_array;
546
36.0k
    const pixel *src            = (const pixel *)_src;
547
36.0k
    pixel *dst                  = (pixel *)_dst;
548
36.0k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
36.0k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
36.0k
    const int8_t *filter        = hf;
551
36.0k
    const int shift             = denom + 14 - BIT_DEPTH;
552
36.0k
#if BIT_DEPTH < 14
553
36.0k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
36.0k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
795k
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
11.6M
        for (int x = 0; x < width; x++)
562
10.8M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
758k
        src += src_stride;
564
758k
        tmp += MAX_PB_SIZE;
565
758k
    }
566
567
36.0k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
36.0k
    filter = vf;
569
570
36.0k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
686k
    for (int y = 0; y < height; y++) {
572
10.3M
        for (int x = 0; x < width; x++)
573
9.65M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
650k
        tmp += MAX_PB_SIZE;
575
650k
        dst += dst_stride;
576
650k
    }
577
36.0k
}
dsp.c:put_uni_chroma_w_hv_10
Line
Count
Source
543
70.8k
{
544
70.8k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
70.8k
    int16_t *tmp                = tmp_array;
546
70.8k
    const pixel *src            = (const pixel *)_src;
547
70.8k
    pixel *dst                  = (pixel *)_dst;
548
70.8k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
70.8k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
70.8k
    const int8_t *filter        = hf;
551
70.8k
    const int shift             = denom + 14 - BIT_DEPTH;
552
70.8k
#if BIT_DEPTH < 14
553
70.8k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
70.8k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
1.00M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
11.5M
        for (int x = 0; x < width; x++)
562
10.6M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
937k
        src += src_stride;
564
937k
        tmp += MAX_PB_SIZE;
565
937k
    }
566
567
70.8k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
70.8k
    filter = vf;
569
570
70.8k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
796k
    for (int y = 0; y < height; y++) {
572
9.65M
        for (int x = 0; x < width; x++)
573
8.93M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
725k
        tmp += MAX_PB_SIZE;
575
725k
        dst += dst_stride;
576
725k
    }
577
70.8k
}
dsp.c:put_uni_chroma_w_hv_12
Line
Count
Source
543
190k
{
544
190k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
190k
    int16_t *tmp                = tmp_array;
546
190k
    const pixel *src            = (const pixel *)_src;
547
190k
    pixel *dst                  = (pixel *)_dst;
548
190k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
190k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
190k
    const int8_t *filter        = hf;
551
190k
    const int shift             = denom + 14 - BIT_DEPTH;
552
190k
#if BIT_DEPTH < 14
553
190k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
190k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
2.09M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
21.7M
        for (int x = 0; x < width; x++)
562
19.8M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
1.90M
        src += src_stride;
564
1.90M
        tmp += MAX_PB_SIZE;
565
1.90M
    }
566
567
190k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
190k
    filter = vf;
569
570
190k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
1.52M
    for (int y = 0; y < height; y++) {
572
17.3M
        for (int x = 0; x < width; x++)
573
15.9M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
1.33M
        tmp += MAX_PB_SIZE;
575
1.33M
        dst += dst_stride;
576
1.33M
    }
577
190k
}
dsp.c:put_uni_chroma_w_hv_8
Line
Count
Source
543
196k
{
544
196k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
196k
    int16_t *tmp                = tmp_array;
546
196k
    const pixel *src            = (const pixel *)_src;
547
196k
    pixel *dst                  = (pixel *)_dst;
548
196k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
196k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
196k
    const int8_t *filter        = hf;
551
196k
    const int shift             = denom + 14 - BIT_DEPTH;
552
196k
#if BIT_DEPTH < 14
553
196k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
196k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
2.07M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
16.8M
        for (int x = 0; x < width; x++)
562
14.9M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
1.87M
        src += src_stride;
564
1.87M
        tmp += MAX_PB_SIZE;
565
1.87M
    }
566
567
196k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
196k
    filter = vf;
569
570
196k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
1.48M
    for (int y = 0; y < height; y++) {
572
12.4M
        for (int x = 0; x < width; x++)
573
11.2M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
1.28M
        tmp += MAX_PB_SIZE;
575
1.28M
        dst += dst_stride;
576
1.28M
    }
577
196k
}
dsp.c:put_uni_chroma_w_hv_12
Line
Count
Source
543
190k
{
544
190k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
190k
    int16_t *tmp                = tmp_array;
546
190k
    const pixel *src            = (const pixel *)_src;
547
190k
    pixel *dst                  = (pixel *)_dst;
548
190k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
190k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
190k
    const int8_t *filter        = hf;
551
190k
    const int shift             = denom + 14 - BIT_DEPTH;
552
190k
#if BIT_DEPTH < 14
553
190k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
190k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
2.09M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
21.7M
        for (int x = 0; x < width; x++)
562
19.8M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
1.90M
        src += src_stride;
564
1.90M
        tmp += MAX_PB_SIZE;
565
1.90M
    }
566
567
190k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
190k
    filter = vf;
569
570
190k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
1.52M
    for (int y = 0; y < height; y++) {
572
17.3M
        for (int x = 0; x < width; x++)
573
15.9M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
1.33M
        tmp += MAX_PB_SIZE;
575
1.33M
        dst += dst_stride;
576
1.33M
    }
577
190k
}
dsp.c:put_uni_chroma_w_hv_10
Line
Count
Source
543
70.8k
{
544
70.8k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
70.8k
    int16_t *tmp                = tmp_array;
546
70.8k
    const pixel *src            = (const pixel *)_src;
547
70.8k
    pixel *dst                  = (pixel *)_dst;
548
70.8k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
70.8k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
70.8k
    const int8_t *filter        = hf;
551
70.8k
    const int shift             = denom + 14 - BIT_DEPTH;
552
70.8k
#if BIT_DEPTH < 14
553
70.8k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
70.8k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
1.00M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
11.5M
        for (int x = 0; x < width; x++)
562
10.6M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
937k
        src += src_stride;
564
937k
        tmp += MAX_PB_SIZE;
565
937k
    }
566
567
70.8k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
70.8k
    filter = vf;
569
570
70.8k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
796k
    for (int y = 0; y < height; y++) {
572
9.65M
        for (int x = 0; x < width; x++)
573
8.93M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
725k
        tmp += MAX_PB_SIZE;
575
725k
        dst += dst_stride;
576
725k
    }
577
70.8k
}
dsp.c:put_uni_chroma_w_hv_8
Line
Count
Source
543
196k
{
544
196k
    int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE];
545
196k
    int16_t *tmp                = tmp_array;
546
196k
    const pixel *src            = (const pixel *)_src;
547
196k
    pixel *dst                  = (pixel *)_dst;
548
196k
    const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
549
196k
    const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
550
196k
    const int8_t *filter        = hf;
551
196k
    const int shift             = denom + 14 - BIT_DEPTH;
552
196k
#if BIT_DEPTH < 14
553
196k
    const int offset            = 1 << (shift - 1);
554
#else
555
    const int offset            = 0;
556
#endif
557
558
196k
    src -= CHROMA_EXTRA_BEFORE * src_stride;
559
560
2.07M
    for (int y = 0; y < height + CHROMA_EXTRA; y++) {
561
16.8M
        for (int x = 0; x < width; x++)
562
14.9M
            tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8);
563
1.87M
        src += src_stride;
564
1.87M
        tmp += MAX_PB_SIZE;
565
1.87M
    }
566
567
196k
    tmp    = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE;
568
196k
    filter = vf;
569
570
196k
    ox     = ox * (1 << (BIT_DEPTH - 8));
571
1.48M
    for (int y = 0; y < height; y++) {
572
12.4M
        for (int x = 0; x < width; x++)
573
11.2M
            dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
574
1.28M
        tmp += MAX_PB_SIZE;
575
1.28M
        dst += dst_stride;
576
1.28M
    }
577
196k
}