Coverage Report

Created: 2022-08-24 06:17

/src/x265/source/common/pixel.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *          Mandar Gurav <mandar@multicorewareinc.com>
6
 *          Mahesh Pittala <mahesh@multicorewareinc.com>
7
 *          Min Chen <min.chen@multicorewareinc.com>
8
 *          Hongbin Liu<liuhongbin1@huawei.com>
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23
 *
24
 * This program is also available under a commercial proprietary license.
25
 * For more information, contact us at license @ x265.com.
26
 *****************************************************************************/
27
28
#include "common.h"
29
#include "slicetype.h"      // LOWRES_COST_MASK
30
#include "primitives.h"
31
#include "x265.h"
32
33
#include <cstdlib> // abs()
34
35
using namespace X265_NS;
36
37
namespace {
38
// place functions in anonymous namespace (file static)
39
40
template<int lx, int ly>
41
int sad(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
42
32.1M
{
43
32.1M
    int sum = 0;
44
45
203M
    for (int y = 0; y < ly; y++)
46
171M
    {
47
1.19G
        for (int x = 0; x < lx; x++)
48
1.02G
            sum += abs(pix1[x] - pix2[x]);
49
50
171M
        pix1 += stride_pix1;
51
171M
        pix2 += stride_pix2;
52
171M
    }
53
54
32.1M
    return sum;
55
32.1M
}
pixel.cpp:int (anonymous namespace)::sad<4, 4>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
42
21.4M
{
43
21.4M
    int sum = 0;
44
45
107M
    for (int y = 0; y < ly; y++)
46
85.8M
    {
47
429M
        for (int x = 0; x < lx; x++)
48
343M
            sum += abs(pix1[x] - pix2[x]);
49
50
85.8M
        pix1 += stride_pix1;
51
85.8M
        pix2 += stride_pix2;
52
85.8M
    }
53
54
21.4M
    return sum;
55
21.4M
}
pixel.cpp:int (anonymous namespace)::sad<8, 8>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
42
10.6M
{
43
10.6M
    int sum = 0;
44
45
95.8M
    for (int y = 0; y < ly; y++)
46
85.2M
    {
47
766M
        for (int x = 0; x < lx; x++)
48
681M
            sum += abs(pix1[x] - pix2[x]);
49
50
85.2M
        pix1 += stride_pix1;
51
85.2M
        pix2 += stride_pix2;
52
85.2M
    }
53
54
10.6M
    return sum;
55
10.6M
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<12, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 24>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<24, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 48>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<48, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 64>(unsigned char const*, long, unsigned char const*, long)
56
57
template<int lx, int ly>
58
int sad(const int16_t* pix1, intptr_t stride_pix1, const int16_t* pix2, intptr_t stride_pix2)
59
{
60
    int sum = 0;
61
62
    for (int y = 0; y < ly; y++)
63
    {
64
        for (int x = 0; x < lx; x++)
65
            sum += abs(pix1[x] - pix2[x]);
66
67
        pix1 += stride_pix1;
68
        pix2 += stride_pix2;
69
    }
70
71
    return sum;
72
}
73
74
template<int lx, int ly>
75
void sad_x3(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, intptr_t frefstride, int32_t* res)
76
0
{
77
0
    res[0] = 0;
78
0
    res[1] = 0;
79
0
    res[2] = 0;
80
0
    for (int y = 0; y < ly; y++)
81
0
    {
82
0
        for (int x = 0; x < lx; x++)
83
0
        {
84
0
            res[0] += abs(pix1[x] - pix2[x]);
85
0
            res[1] += abs(pix1[x] - pix3[x]);
86
0
            res[2] += abs(pix1[x] - pix4[x]);
87
0
        }
88
89
0
        pix1 += FENC_STRIDE;
90
0
        pix2 += frefstride;
91
0
        pix3 += frefstride;
92
0
        pix4 += frefstride;
93
0
    }
94
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 12>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<12, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 24>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<24, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 48>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<48, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
95
96
template<int lx, int ly>
97
void sad_x4(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, const pixel* pix5, intptr_t frefstride, int32_t* res)
98
0
{
99
0
    res[0] = 0;
100
0
    res[1] = 0;
101
0
    res[2] = 0;
102
0
    res[3] = 0;
103
0
    for (int y = 0; y < ly; y++)
104
0
    {
105
0
        for (int x = 0; x < lx; x++)
106
0
        {
107
0
            res[0] += abs(pix1[x] - pix2[x]);
108
0
            res[1] += abs(pix1[x] - pix3[x]);
109
0
            res[2] += abs(pix1[x] - pix4[x]);
110
0
            res[3] += abs(pix1[x] - pix5[x]);
111
0
        }
112
113
0
        pix1 += FENC_STRIDE;
114
0
        pix2 += frefstride;
115
0
        pix3 += frefstride;
116
0
        pix4 += frefstride;
117
0
        pix5 += frefstride;
118
0
    }
119
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 12>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<12, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 24>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<24, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 48>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<48, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
120
121
template<int lx, int ly>
122
int ads_x4(int encDC[4], uint32_t *sums, int delta, uint16_t *costMvX, int16_t *mvs, int width, int thresh)
123
0
{
124
0
    int nmv = 0;
125
0
    for (int16_t i = 0; i < width; i++, sums++)
126
0
    {
127
0
        int ads = abs(encDC[0] - long(sums[0]))
128
0
            + abs(encDC[1] - long(sums[lx >> 1]))
129
0
            + abs(encDC[2] - long(sums[delta]))
130
0
            + abs(encDC[3] - long(sums[delta + (lx >> 1)]))
131
0
            + costMvX[i];
132
0
        if (ads < thresh)
133
0
            mvs[nmv++] = i;
134
0
    }
135
0
    return nmv;
136
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<16, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 24>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<24, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<8, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 48>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<48, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<16, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
137
138
template<int lx, int ly>
139
int ads_x2(int encDC[2], uint32_t *sums, int delta, uint16_t *costMvX, int16_t *mvs, int width, int thresh)
140
0
{
141
0
    int nmv = 0;
142
0
    for (int16_t i = 0; i < width; i++, sums++)
143
0
    {
144
0
        int ads = abs(encDC[0] - long(sums[0]))
145
0
            + abs(encDC[1] - long(sums[delta]))
146
0
            + costMvX[i];
147
0
        if (ads < thresh)
148
0
            mvs[nmv++] = i;
149
0
    }
150
0
    return nmv;
151
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<8, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<4, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<16, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<8, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<32, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<16, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<64, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<32, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
152
153
template<int lx, int ly>
154
int ads_x1(int encDC[1], uint32_t *sums, int, uint16_t *costMvX, int16_t *mvs, int width, int thresh)
155
0
{
156
0
    int nmv = 0;
157
0
    for (int16_t i = 0; i < width; i++, sums++)
158
0
    {
159
0
        int ads = abs(encDC[0] - long(sums[0]))
160
0
            + costMvX[i];
161
0
        if (ads < thresh)
162
0
            mvs[nmv++] = i;
163
0
    }
164
0
    return nmv;
165
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<4, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<8, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<16, 12>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<12, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<16, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<4, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
166
167
template<int lx, int ly, class T1, class T2>
168
sse_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
169
13.8M
{
170
13.8M
    sse_t sum = 0;
171
13.8M
    int tmp;
172
173
86.5M
    for (int y = 0; y < ly; y++)
174
72.7M
    {
175
586M
        for (int x = 0; x < lx; x++)
176
514M
        {
177
514M
            tmp = pix1[x] - pix2[x];
178
514M
            sum += (tmp * tmp);
179
514M
        }
180
181
72.7M
        pix1 += stride_pix1;
182
72.7M
        pix2 += stride_pix2;
183
72.7M
    }
184
185
13.8M
    return sum;
186
13.8M
}
pixel.cpp:unsigned int (anonymous namespace)::sse<4, 4, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
169
10.7M
{
170
10.7M
    sse_t sum = 0;
171
10.7M
    int tmp;
172
173
53.8M
    for (int y = 0; y < ly; y++)
174
43.0M
    {
175
215M
        for (int x = 0; x < lx; x++)
176
172M
        {
177
172M
            tmp = pix1[x] - pix2[x];
178
172M
            sum += (tmp * tmp);
179
172M
        }
180
181
43.0M
        pix1 += stride_pix1;
182
43.0M
        pix2 += stride_pix2;
183
43.0M
    }
184
185
10.7M
    return sum;
186
10.7M
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 4, short, short>(short const*, long, short const*, long)
pixel.cpp:unsigned int (anonymous namespace)::sse<8, 8, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
169
2.50M
{
170
2.50M
    sse_t sum = 0;
171
2.50M
    int tmp;
172
173
22.5M
    for (int y = 0; y < ly; y++)
174
20.0M
    {
175
180M
        for (int x = 0; x < lx; x++)
176
160M
        {
177
160M
            tmp = pix1[x] - pix2[x];
178
160M
            sum += (tmp * tmp);
179
160M
        }
180
181
20.0M
        pix1 += stride_pix1;
182
20.0M
        pix2 += stride_pix2;
183
20.0M
    }
184
185
2.50M
    return sum;
186
2.50M
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 8, short, short>(short const*, long, short const*, long)
pixel.cpp:unsigned int (anonymous namespace)::sse<16, 16, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
169
491k
{
170
491k
    sse_t sum = 0;
171
491k
    int tmp;
172
173
8.34M
    for (int y = 0; y < ly; y++)
174
7.85M
    {
175
133M
        for (int x = 0; x < lx; x++)
176
125M
        {
177
125M
            tmp = pix1[x] - pix2[x];
178
125M
            sum += (tmp * tmp);
179
125M
        }
180
181
7.85M
        pix1 += stride_pix1;
182
7.85M
        pix2 += stride_pix2;
183
7.85M
    }
184
185
491k
    return sum;
186
491k
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 16, short, short>(short const*, long, short const*, long)
pixel.cpp:unsigned int (anonymous namespace)::sse<32, 32, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
169
54.7k
{
170
54.7k
    sse_t sum = 0;
171
54.7k
    int tmp;
172
173
1.80M
    for (int y = 0; y < ly; y++)
174
1.75M
    {
175
57.7M
        for (int x = 0; x < lx; x++)
176
55.9M
        {
177
55.9M
            tmp = pix1[x] - pix2[x];
178
55.9M
            sum += (tmp * tmp);
179
55.9M
        }
180
181
1.75M
        pix1 += stride_pix1;
182
1.75M
        pix2 += stride_pix2;
183
1.75M
    }
184
185
54.7k
    return sum;
186
54.7k
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 32, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<64, 64, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<64, 64, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<2, 2, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<2, 4, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 8, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 16, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 32, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 64, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
187
188
6.04G
#define BITS_PER_SUM (8 * sizeof(sum_t))
189
190
835M
#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) { \
191
835M
        sum2_t t0 = s0 + s1; \
192
835M
        sum2_t t1 = s0 - s1; \
193
835M
        sum2_t t2 = s2 + s3; \
194
835M
        sum2_t t3 = s2 - s3; \
195
835M
        d0 = t0 + t2; \
196
835M
        d2 = t0 - t2; \
197
835M
        d1 = t1 + t3; \
198
835M
        d3 = t1 - t3; \
199
835M
}
200
201
// in: a pseudo-simd number of the form x+(y<<16)
202
// return: abs(x)+(abs(y)<<16)
203
inline sum2_t abs2(sum2_t a)
204
1.90G
{
205
1.90G
    sum2_t s = ((a >> (BITS_PER_SUM - 1)) & (((sum2_t)1 << BITS_PER_SUM) + 1)) * ((sum_t)-1);
206
207
1.90G
    return (a + s) ^ s;
208
1.90G
}
209
210
static int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
211
67.3M
{
212
67.3M
    sum2_t tmp[4][2];
213
67.3M
    sum2_t a0, a1, a2, a3, b0, b1;
214
67.3M
    sum2_t sum = 0;
215
216
336M
    for (int i = 0; i < 4; i++, pix1 += stride_pix1, pix2 += stride_pix2)
217
269M
    {
218
269M
        a0 = pix1[0] - pix2[0];
219
269M
        a1 = pix1[1] - pix2[1];
220
269M
        b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM);
221
269M
        a2 = pix1[2] - pix2[2];
222
269M
        a3 = pix1[3] - pix2[3];
223
269M
        b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM);
224
269M
        tmp[i][0] = b0 + b1;
225
269M
        tmp[i][1] = b0 - b1;
226
269M
    }
227
228
203M
    for (int i = 0; i < 2; i++)
229
136M
    {
230
136M
        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
231
136M
        a0 = abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
232
136M
        sum += ((sum_t)a0) + (a0 >> BITS_PER_SUM);
233
136M
    }
234
235
67.3M
    return (int)(sum >> 1);
236
67.3M
}
237
238
// x264's SWAR version of satd 8x4, performs two 4x4 SATDs at once
239
static int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
240
2.10M
{
241
2.10M
    sum2_t tmp[4][4];
242
2.10M
    sum2_t a0, a1, a2, a3;
243
2.10M
    sum2_t sum = 0;
244
245
10.5M
    for (int i = 0; i < 4; i++, pix1 += stride_pix1, pix2 += stride_pix2)
246
8.41M
    {
247
8.41M
        a0 = (pix1[0] - pix2[0]) + ((sum2_t)(pix1[4] - pix2[4]) << BITS_PER_SUM);
248
8.41M
        a1 = (pix1[1] - pix2[1]) + ((sum2_t)(pix1[5] - pix2[5]) << BITS_PER_SUM);
249
8.41M
        a2 = (pix1[2] - pix2[2]) + ((sum2_t)(pix1[6] - pix2[6]) << BITS_PER_SUM);
250
8.41M
        a3 = (pix1[3] - pix2[3]) + ((sum2_t)(pix1[7] - pix2[7]) << BITS_PER_SUM);
251
8.41M
        HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3);
252
8.41M
    }
253
254
10.5M
    for (int i = 0; i < 4; i++)
255
8.41M
    {
256
8.41M
        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
257
8.41M
        sum += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
258
8.41M
    }
259
260
2.10M
    return (((sum_t)sum) + (sum >> BITS_PER_SUM)) >> 1;
261
2.10M
}
262
263
template<int w, int h>
264
// calculate satd in blocks of 4x4
265
int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
266
0
{
267
0
    int satd = 0;
268
269
#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
270
    pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon;
271
#endif
272
273
0
    for (int row = 0; row < h; row += 4)
274
0
        for (int col = 0; col < w; col += 4)
275
0
            satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1,
276
0
                             pix2 + row * stride_pix2 + col, stride_pix2);
277
278
0
    return satd;
279
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<12, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<16, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<16, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<8, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<8, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<12, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 32>(unsigned char const*, long, unsigned char const*, long)
280
281
template<int w, int h>
282
// calculate satd in blocks of 8x4
283
int satd8(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
284
1.05M
{
285
1.05M
    int satd = 0;
286
287
#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
288
    pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon;
289
#endif
290
291
3.15M
    for (int row = 0; row < h; row += 4)
292
4.20M
        for (int col = 0; col < w; col += 8)
293
2.10M
            satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1,
294
2.10M
                             pix2 + row * stride_pix2 + col, stride_pix2);
295
296
1.05M
    return satd;
297
1.05M
}
pixel.cpp:int (anonymous namespace)::satd8<8, 8>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
284
1.05M
{
285
1.05M
    int satd = 0;
286
287
#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
288
    pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon;
289
#endif
290
291
3.15M
    for (int row = 0; row < h; row += 4)
292
4.20M
        for (int col = 0; col < w; col += 8)
293
2.10M
            satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1,
294
2.10M
                             pix2 + row * stride_pix2 + col, stride_pix2);
295
296
1.05M
    return satd;
297
1.05M
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 24>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<24, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 48>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<48, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 24>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 48>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<24, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 64>(unsigned char const*, long, unsigned char const*, long)
298
299
inline int _sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
300
42.7M
{
301
42.7M
    sum2_t tmp[8][4];
302
42.7M
    sum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
303
42.7M
    sum2_t sum = 0;
304
305
381M
    for (int i = 0; i < 8; i++, pix1 += i_pix1, pix2 += i_pix2)
306
339M
    {
307
339M
        a0 = pix1[0] - pix2[0];
308
339M
        a1 = pix1[1] - pix2[1];
309
339M
        b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM);
310
339M
        a2 = pix1[2] - pix2[2];
311
339M
        a3 = pix1[3] - pix2[3];
312
339M
        b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM);
313
339M
        a4 = pix1[4] - pix2[4];
314
339M
        a5 = pix1[5] - pix2[5];
315
339M
        b2 = (a4 + a5) + ((a4 - a5) << BITS_PER_SUM);
316
339M
        a6 = pix1[6] - pix2[6];
317
339M
        a7 = pix1[7] - pix2[7];
318
339M
        b3 = (a6 + a7) + ((a6 - a7) << BITS_PER_SUM);
319
339M
        HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], b0, b1, b2, b3);
320
339M
    }
321
322
214M
    for (int i = 0; i < 4; i++)
323
171M
    {
324
171M
        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
325
171M
        HADAMARD4(a4, a5, a6, a7, tmp[4][i], tmp[5][i], tmp[6][i], tmp[7][i]);
326
171M
        b0  = abs2(a0 + a4) + abs2(a0 - a4);
327
171M
        b0 += abs2(a1 + a5) + abs2(a1 - a5);
328
171M
        b0 += abs2(a2 + a6) + abs2(a2 - a6);
329
171M
        b0 += abs2(a3 + a7) + abs2(a3 - a7);
330
171M
        sum += (sum_t)b0 + (b0 >> BITS_PER_SUM);
331
171M
    }
332
333
42.7M
    return (int)sum;
334
42.7M
}
335
336
inline int sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
337
22.3M
{
338
22.3M
    return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
339
22.3M
}
340
341
static int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
342
5.27M
{
343
5.27M
    int sum = _sa8d_8x8(pix1, i_pix1, pix2, i_pix2)
344
5.27M
        + _sa8d_8x8(pix1 + 8, i_pix1, pix2 + 8, i_pix2)
345
5.27M
        + _sa8d_8x8(pix1 + 8 * i_pix1, i_pix1, pix2 + 8 * i_pix2, i_pix2)
346
5.27M
        + _sa8d_8x8(pix1 + 8 + 8 * i_pix1, i_pix1, pix2 + 8 + 8 * i_pix2, i_pix2);
347
348
    // This matches x264 sa8d_16x16, but is slightly different from HM's behavior because
349
    // this version only rounds once at the end
350
5.27M
    return (sum + 2) >> 2;
351
5.27M
}
352
353
template<int w, int h>
354
// Calculate sa8d in blocks of 8x8
355
int sa8d8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
356
0
{
357
0
    int cost = 0;
358
359
0
    for (int y = 0; y < h; y += 8)
360
0
        for (int x = 0; x < w; x += 8)
361
0
            cost += sa8d_8x8(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
362
363
0
    return cost;
364
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d8<8, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d8<8, 16>(unsigned char const*, long, unsigned char const*, long)
365
366
template<int w, int h>
367
// Calculate sa8d in blocks of 16x16
368
int sa8d16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
369
606k
{
370
606k
    int cost = 0;
371
372
1.81M
    for (int y = 0; y < h; y += 16)
373
3.63M
        for (int x = 0; x < w; x += 16)
374
2.42M
            cost += sa8d_16x16(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
375
376
606k
    return cost;
377
606k
}
pixel.cpp:int (anonymous namespace)::sa8d16<32, 32>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
369
606k
{
370
606k
    int cost = 0;
371
372
1.81M
    for (int y = 0; y < h; y += 16)
373
3.63M
        for (int x = 0; x < w; x += 16)
374
2.42M
            cost += sa8d_16x16(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
375
376
606k
    return cost;
377
606k
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<64, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<16, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<16, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<32, 64>(unsigned char const*, long, unsigned char const*, long)
378
379
template<int size>
380
sse_t pixel_ssd_s_c(const int16_t* a, intptr_t dstride)
381
0
{
382
0
    sse_t sum = 0;
383
0
    for (int y = 0; y < size; y++)
384
0
    {
385
0
        for (int x = 0; x < size; x++)
386
0
            sum += a[x] * a[x];
387
388
0
        a += dstride;
389
0
    }
390
0
    return sum;
391
0
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<4>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<8>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<16>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<32>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<64>(short const*, long)
392
393
template<int size>
394
void blockfill_s_c(int16_t* dst, intptr_t dstride, int16_t val)
395
33.0k
{
396
397k
    for (int y = 0; y < size; y++)
397
6.37M
        for (int x = 0; x < size; x++)
398
6.01M
            dst[y * dstride + x] = val;
399
33.0k
}
pixel.cpp:void (anonymous namespace)::blockfill_s_c<4>(short*, long, short)
Line
Count
Source
395
10.2k
{
396
51.0k
    for (int y = 0; y < size; y++)
397
204k
        for (int x = 0; x < size; x++)
398
163k
            dst[y * dstride + x] = val;
399
10.2k
}
pixel.cpp:void (anonymous namespace)::blockfill_s_c<8>(short*, long, short)
Line
Count
Source
395
10.4k
{
396
93.8k
    for (int y = 0; y < size; y++)
397
751k
        for (int x = 0; x < size; x++)
398
667k
            dst[y * dstride + x] = val;
399
10.4k
}
pixel.cpp:void (anonymous namespace)::blockfill_s_c<16>(short*, long, short)
Line
Count
Source
395
9.79k
{
396
166k
    for (int y = 0; y < size; y++)
397
2.66M
        for (int x = 0; x < size; x++)
398
2.50M
            dst[y * dstride + x] = val;
399
9.79k
}
pixel.cpp:void (anonymous namespace)::blockfill_s_c<32>(short*, long, short)
Line
Count
Source
395
2.61k
{
396
86.1k
    for (int y = 0; y < size; y++)
397
2.75M
        for (int x = 0; x < size; x++)
398
2.67M
            dst[y * dstride + x] = val;
399
2.61k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<64>(short*, long, short)
400
401
template<int size>
402
void cpy2Dto1D_shl(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift)
403
0
{
404
0
    X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n");
405
0
    X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n");
406
0
    X265_CHECK(shift >= 0, "invalid shift\n");
407
408
0
    for (int i = 0; i < size; i++)
409
0
    {
410
0
        for (int j = 0; j < size; j++)
411
0
            dst[j] = src[j] << shift;
412
413
0
        src += srcStride;
414
0
        dst += size;
415
0
    }
416
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<64>(short*, short const*, long, int)
417
418
template<int size>
419
void cpy2Dto1D_shr(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift)
420
0
{
421
0
    X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n");
422
0
    X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n");
423
0
    X265_CHECK(shift > 0, "invalid shift\n");
424
425
0
    int16_t round = 1 << (shift - 1);
426
0
    for (int i = 0; i < size; i++)
427
0
    {
428
0
        for (int j = 0; j < size; j++)
429
0
            dst[j] = (src[j] + round) >> shift;
430
431
0
        src += srcStride;
432
0
        dst += size;
433
0
    }
434
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<64>(short*, short const*, long, int)
435
436
template<int size>
437
void cpy1Dto2D_shl(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift)
438
14.1k
{
439
14.1k
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
440
14.1k
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
441
14.1k
    X265_CHECK(shift >= 0, "invalid shift\n");
442
443
139k
    for (int i = 0; i < size; i++)
444
125k
    {
445
2.16M
        for (int j = 0; j < size; j++)
446
2.04M
            dst[j] = src[j] << shift;
447
448
125k
        src += size;
449
125k
        dst += dstStride;
450
125k
    }
451
14.1k
}
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<4>(short*, short const*, long, int)
Line
Count
Source
438
7.75k
{
439
7.75k
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
440
7.75k
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
441
7.75k
    X265_CHECK(shift >= 0, "invalid shift\n");
442
443
38.7k
    for (int i = 0; i < size; i++)
444
31.0k
    {
445
155k
        for (int j = 0; j < size; j++)
446
124k
            dst[j] = src[j] << shift;
447
448
31.0k
        src += size;
449
31.0k
        dst += dstStride;
450
31.0k
    }
451
7.75k
}
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<8>(short*, short const*, long, int)
Line
Count
Source
438
3.49k
{
439
3.49k
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
440
3.49k
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
441
3.49k
    X265_CHECK(shift >= 0, "invalid shift\n");
442
443
31.4k
    for (int i = 0; i < size; i++)
444
27.9k
    {
445
251k
        for (int j = 0; j < size; j++)
446
223k
            dst[j] = src[j] << shift;
447
448
27.9k
        src += size;
449
27.9k
        dst += dstStride;
450
27.9k
    }
451
3.49k
}
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<16>(short*, short const*, long, int)
Line
Count
Source
438
1.69k
{
439
1.69k
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
440
1.69k
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
441
1.69k
    X265_CHECK(shift >= 0, "invalid shift\n");
442
443
28.7k
    for (int i = 0; i < size; i++)
444
27.1k
    {
445
460k
        for (int j = 0; j < size; j++)
446
433k
            dst[j] = src[j] << shift;
447
448
27.1k
        src += size;
449
27.1k
        dst += dstStride;
450
27.1k
    }
451
1.69k
}
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<32>(short*, short const*, long, int)
Line
Count
Source
438
1.23k
{
439
1.23k
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
440
1.23k
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
441
1.23k
    X265_CHECK(shift >= 0, "invalid shift\n");
442
443
40.6k
    for (int i = 0; i < size; i++)
444
39.4k
    {
445
1.30M
        for (int j = 0; j < size; j++)
446
1.26M
            dst[j] = src[j] << shift;
447
448
39.4k
        src += size;
449
39.4k
        dst += dstStride;
450
39.4k
    }
451
1.23k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<64>(short*, short const*, long, int)
452
453
template<int size>
454
void cpy1Dto2D_shr(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift)
455
0
{
456
0
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
457
0
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
458
0
    X265_CHECK(shift > 0, "invalid shift\n");
459
460
0
    int16_t round = 1 << (shift - 1);
461
0
    for (int i = 0; i < size; i++)
462
0
    {
463
0
        for (int j = 0; j < size; j++)
464
0
            dst[j] = (src[j] + round) >> shift;
465
466
0
        src += size;
467
0
        dst += dstStride;
468
0
    }
469
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<64>(short*, short const*, long, int)
470
471
template<int blockSize>
472
void getResidual(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride)
473
13.0M
{
474
78.5M
    for (int y = 0; y < blockSize; y++)
475
65.4M
    {
476
497M
        for (int x = 0; x < blockSize; x++)
477
432M
            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
478
479
65.4M
        fenc += stride;
480
65.4M
        residual += stride;
481
65.4M
        pred += stride;
482
65.4M
    }
483
13.0M
}
pixel.cpp:void (anonymous namespace)::getResidual<4>(unsigned char const*, unsigned char const*, short*, long)
Line
Count
Source
473
10.7M
{
474
53.8M
    for (int y = 0; y < blockSize; y++)
475
43.1M
    {
476
215M
        for (int x = 0; x < blockSize; x++)
477
172M
            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
478
479
43.1M
        fenc += stride;
480
43.1M
        residual += stride;
481
43.1M
        pred += stride;
482
43.1M
    }
483
10.7M
}
pixel.cpp:void (anonymous namespace)::getResidual<8>(unsigned char const*, unsigned char const*, short*, long)
Line
Count
Source
473
1.82M
{
474
16.4M
    for (int y = 0; y < blockSize; y++)
475
14.6M
    {
476
131M
        for (int x = 0; x < blockSize; x++)
477
116M
            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
478
479
14.6M
        fenc += stride;
480
14.6M
        residual += stride;
481
14.6M
        pred += stride;
482
14.6M
    }
483
1.82M
}
pixel.cpp:void (anonymous namespace)::getResidual<16>(unsigned char const*, unsigned char const*, short*, long)
Line
Count
Source
473
409k
{
474
6.95M
    for (int y = 0; y < blockSize; y++)
475
6.54M
    {
476
111M
        for (int x = 0; x < blockSize; x++)
477
104M
            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
478
479
6.54M
        fenc += stride;
480
6.54M
        residual += stride;
481
6.54M
        pred += stride;
482
6.54M
    }
483
409k
}
pixel.cpp:void (anonymous namespace)::getResidual<32>(unsigned char const*, unsigned char const*, short*, long)
Line
Count
Source
473
37.4k
{
474
1.23M
    for (int y = 0; y < blockSize; y++)
475
1.19M
    {
476
39.5M
        for (int x = 0; x < blockSize; x++)
477
38.3M
            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
478
479
1.19M
        fenc += stride;
480
1.19M
        residual += stride;
481
1.19M
        pred += stride;
482
1.19M
    }
483
37.4k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<64>(unsigned char const*, unsigned char const*, short*, long)
484
485
template<int blockSize>
486
void transpose(pixel* dst, const pixel* src, intptr_t stride)
487
0
{
488
0
    for (int k = 0; k < blockSize; k++)
489
0
        for (int l = 0; l < blockSize; l++)
490
0
            dst[k * blockSize + l] = src[l * stride + k];
491
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<4>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<8>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<16>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<32>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<64>(unsigned char*, unsigned char const*, long)
492
493
static void weight_sp_c(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
494
0
{
495
0
    int x, y;
496
497
#if CHECKED_BUILD || _DEBUG
498
    const int correction = (IF_INTERNAL_PREC - X265_DEPTH);
499
    X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n");
500
    X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n");
501
    X265_CHECK((shift >= correction), "shift must be include factor correction, please update ASM ABI\n");
502
#endif
503
504
0
    for (y = 0; y <= height - 1; y++)
505
0
    {
506
0
        for (x = 0; x <= width - 1; )
507
0
        {
508
            // note: width can be odd
509
0
            dst[x] = x265_clip(((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
510
0
            x++;
511
0
        }
512
513
0
        src += srcStride;
514
0
        dst += dstStride;
515
0
    }
516
0
}
517
518
static void weight_pp_c(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
519
0
{
520
0
    int x, y;
521
522
0
    const int correction = (IF_INTERNAL_PREC - X265_DEPTH);
523
524
0
    X265_CHECK(!(width & 15), "weightp alignment error\n");
525
0
    X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n");
526
0
    X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n");
527
0
    X265_CHECK((shift >= correction), "shift must be include factor correction, please update ASM ABI\n");
528
0
    X265_CHECK(!(round & ((1 << correction) - 1)), "round must be include factor correction, please update ASM ABI\n");
529
530
0
    for (y = 0; y <= height - 1; y++)
531
0
    {
532
0
        for (x = 0; x <= width - 1; )
533
0
        {
534
            // simulating pixel to short conversion
535
0
            int16_t val = src[x] << correction;
536
0
            dst[x] = x265_clip(((w0 * (val) + round) >> shift) + offset);
537
0
            x++;
538
0
        }
539
540
0
        src += stride;
541
0
        dst += stride;
542
0
    }
543
0
}
544
545
template<int lx, int ly>
546
void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)
547
0
{
548
0
    for (int y = 0; y < ly; y++)
549
0
    {
550
0
        for (int x = 0; x < lx; x++)
551
0
            dst[x] = (src0[x] + src1[x] + 1) >> 1;
552
553
0
        src0 += sstride0;
554
0
        src1 += sstride1;
555
0
        dst += dstride;
556
0
    }
557
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 12>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<12, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 24>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<24, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 48>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<48, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
558
559
static void scale1D_128to64(pixel *dst, const pixel *src)
560
0
{
561
0
    int x;
562
0
    const pixel* src1 = src;
563
0
    const pixel* src2 = src + 128;
564
565
0
    pixel* dst1 = dst;
566
0
    pixel* dst2 = dst + 64/*128*/;
567
568
0
    for (x = 0; x < 128; x += 2)
569
0
    {
570
        // Top pixel
571
0
        pixel pix0 = src1[(x + 0)];
572
0
        pixel pix1 = src1[(x + 1)];
573
574
        // Left pixel
575
0
        pixel pix2 = src2[(x + 0)];
576
0
        pixel pix3 = src2[(x + 1)];
577
0
        int sum1 = pix0 + pix1;
578
0
        int sum2 = pix2 + pix3;
579
580
0
        dst1[x >> 1] = (pixel)((sum1 + 1) >> 1);
581
0
        dst2[x >> 1] = (pixel)((sum2 + 1) >> 1);
582
0
    }
583
0
}
584
585
static void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
586
0
{
587
0
    uint32_t x, y;
588
589
0
    for (y = 0; y < 64; y += 2)
590
0
    {
591
0
        for (x = 0; x < 64; x += 2)
592
0
        {
593
0
            pixel pix0 = src[(y + 0) * stride + (x + 0)];
594
0
            pixel pix1 = src[(y + 0) * stride + (x + 1)];
595
0
            pixel pix2 = src[(y + 1) * stride + (x + 0)];
596
0
            pixel pix3 = src[(y + 1) * stride + (x + 1)];
597
0
            int sum = pix0 + pix1 + pix2 + pix3;
598
599
0
            dst[y / 2 * 32 + x / 2] = (pixel)((sum + 2) >> 2);
600
0
        }
601
0
    }
602
0
}
603
604
static
605
void frame_init_lowres_core(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc,
606
                            intptr_t src_stride, intptr_t dst_stride, int width, int height)
607
698
{
608
61.1k
    for (int y = 0; y < height; y++)
609
60.4k
    {
610
60.4k
        const pixel* src1 = src0 + src_stride;
611
60.4k
        const pixel* src2 = src1 + src_stride;
612
5.67M
        for (int x = 0; x < width; x++)
613
5.61M
        {
614
            // slower than naive bilinear, but matches asm
615
22.4M
#define FILTER(a, b, c, d) ((((a + b + 1) >> 1) + ((c + d + 1) >> 1) + 1) >> 1)
616
5.61M
            dst0[x] = FILTER(src0[2 * x], src1[2 * x], src0[2 * x + 1], src1[2 * x + 1]);
617
5.61M
            dsth[x] = FILTER(src0[2 * x + 1], src1[2 * x + 1], src0[2 * x + 2], src1[2 * x + 2]);
618
5.61M
            dstv[x] = FILTER(src1[2 * x], src2[2 * x], src1[2 * x + 1], src2[2 * x + 1]);
619
5.61M
            dstc[x] = FILTER(src1[2 * x + 1], src2[2 * x + 1], src1[2 * x + 2], src2[2 * x + 2]);
620
5.61M
#undef FILTER
621
5.61M
        }
622
60.4k
        src0 += src_stride * 2;
623
60.4k
        dst0 += dst_stride;
624
60.4k
        dsth += dst_stride;
625
60.4k
        dstv += dst_stride;
626
60.4k
        dstc += dst_stride;
627
60.4k
    }
628
698
}
629
630
/* structural similarity metric */
631
static void ssim_4x4x2_core(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4])
632
0
{
633
0
    for (int z = 0; z < 2; z++)
634
0
    {
635
0
        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
636
0
        for (int y = 0; y < 4; y++)
637
0
        {
638
0
            for (int x = 0; x < 4; x++)
639
0
            {
640
0
                int a = pix1[x + y * stride1];
641
0
                int b = pix2[x + y * stride2];
642
0
                s1 += a;
643
0
                s2 += b;
644
0
                ss += a * a;
645
0
                ss += b * b;
646
0
                s12 += a * b;
647
0
            }
648
0
        }
649
650
0
        sums[z][0] = s1;
651
0
        sums[z][1] = s2;
652
0
        sums[z][2] = ss;
653
0
        sums[z][3] = s12;
654
0
        pix1 += 4;
655
0
        pix2 += 4;
656
0
    }
657
0
}
658
659
static float ssim_end_1(int s1, int s2, int ss, int s12)
660
0
{
661
/* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases.
662
 * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
663
 * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */
664
665
#if HIGH_BIT_DEPTH
666
    X265_CHECK((X265_DEPTH == 10) || (X265_DEPTH == 12), "ssim invalid depth\n");
667
#define type float
668
    static const float ssim_c1 = (float)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64);
669
    static const float ssim_c2 = (float)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63);
670
#else
671
0
    X265_CHECK(X265_DEPTH == 8, "ssim invalid depth\n");
672
0
#define type int
673
0
    static const int ssim_c1 = (int)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64 + .5);
674
0
    static const int ssim_c2 = (int)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63 + .5);
675
0
#endif
676
0
    type fs1 = (type)s1;
677
0
    type fs2 = (type)s2;
678
0
    type fss = (type)ss;
679
0
    type fs12 = (type)s12;
680
0
    type vars = (type)(fss * 64 - fs1 * fs1 - fs2 * fs2);
681
0
    type covar = (type)(fs12 * 64 - fs1 * fs2);
682
0
    return (float)(2 * fs1 * fs2 + ssim_c1) * (float)(2 * covar + ssim_c2)
683
0
           / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + ssim_c2));
684
0
#undef type
685
0
#undef PIXEL_MAX
686
0
}
687
688
static float ssim_end_4(int sum0[5][4], int sum1[5][4], int width)
689
0
{
690
0
    float ssim = 0.0;
691
692
0
    for (int i = 0; i < width; i++)
693
0
    {
694
0
        ssim += ssim_end_1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] + sum1[i + 1][0],
695
0
                           sum0[i][1] + sum0[i + 1][1] + sum1[i][1] + sum1[i + 1][1],
696
0
                           sum0[i][2] + sum0[i + 1][2] + sum1[i][2] + sum1[i + 1][2],
697
0
                           sum0[i][3] + sum0[i + 1][3] + sum1[i][3] + sum1[i + 1][3]);
698
0
    }
699
700
0
    return ssim;
701
0
}
702
703
template<int size>
704
uint64_t pixel_var(const pixel* pix, intptr_t i_stride)
705
181k
{
706
181k
    uint32_t sum = 0, sqr = 0;
707
708
2.11M
    for (int y = 0; y < size; y++)
709
1.93M
    {
710
25.1M
        for (int x = 0; x < size; x++)
711
23.1M
        {
712
23.1M
            sum += pix[x];
713
23.1M
            sqr += pix[x] * pix[x];
714
23.1M
        }
715
716
1.93M
        pix += i_stride;
717
1.93M
    }
718
719
181k
    return sum + ((uint64_t)sqr << 32);
720
181k
}
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<4>(unsigned char const*, long)
pixel.cpp:unsigned long (anonymous namespace)::pixel_var<8>(unsigned char const*, long)
Line
Count
Source
705
120k
{
706
120k
    uint32_t sum = 0, sqr = 0;
707
708
1.08M
    for (int y = 0; y < size; y++)
709
966k
    {
710
8.69M
        for (int x = 0; x < size; x++)
711
7.72M
        {
712
7.72M
            sum += pix[x];
713
7.72M
            sqr += pix[x] * pix[x];
714
7.72M
        }
715
716
966k
        pix += i_stride;
717
966k
    }
718
719
120k
    return sum + ((uint64_t)sqr << 32);
720
120k
}
pixel.cpp:unsigned long (anonymous namespace)::pixel_var<16>(unsigned char const*, long)
Line
Count
Source
705
60.3k
{
706
60.3k
    uint32_t sum = 0, sqr = 0;
707
708
1.02M
    for (int y = 0; y < size; y++)
709
966k
    {
710
16.4M
        for (int x = 0; x < size; x++)
711
15.4M
        {
712
15.4M
            sum += pix[x];
713
15.4M
            sqr += pix[x] * pix[x];
714
15.4M
        }
715
716
966k
        pix += i_stride;
717
966k
    }
718
719
60.3k
    return sum + ((uint64_t)sqr << 32);
720
60.3k
}
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<32>(unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<64>(unsigned char const*, long)
721
722
#if defined(_MSC_VER)
723
#pragma warning(disable: 4127) // conditional expression is constant
724
#endif
725
726
template<int size>
727
int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride)
728
13.8M
{
729
13.8M
    static pixel zeroBuf[8] /* = { 0 } */;
730
731
13.8M
    if (size)
732
3.04M
    {
733
3.04M
        int dim = 1 << (size + 2);
734
3.04M
        uint32_t totEnergy = 0;
735
6.75M
        for (int i = 0; i < dim; i += 8)
736
3.70M
        {
737
9.04M
            for (int j = 0; j < dim; j+= 8)
738
5.34M
            {
739
                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
740
5.34M
                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
741
5.34M
                                   (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
742
5.34M
                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
743
5.34M
                                   (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
744
745
5.34M
                totEnergy += abs(sourceEnergy - reconEnergy);
746
5.34M
            }
747
3.70M
        }
748
3.04M
        return totEnergy;
749
3.04M
    }
750
10.7M
    else
751
10.7M
    {
752
        /* 4x4 is too small for sa8d */
753
10.7M
        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
754
10.7M
        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
755
10.7M
        return abs(sourceEnergy - reconEnergy);
756
10.7M
    }
757
13.8M
}
pixel.cpp:int (anonymous namespace)::psyCost_pp<0>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
728
10.7M
{
729
10.7M
    static pixel zeroBuf[8] /* = { 0 } */;
730
731
10.7M
    if (size)
732
0
    {
733
0
        int dim = 1 << (size + 2);
734
0
        uint32_t totEnergy = 0;
735
0
        for (int i = 0; i < dim; i += 8)
736
0
        {
737
0
            for (int j = 0; j < dim; j+= 8)
738
0
            {
739
                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
740
0
                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
741
0
                                   (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
742
0
                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
743
0
                                   (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
744
745
0
                totEnergy += abs(sourceEnergy - reconEnergy);
746
0
            }
747
0
        }
748
0
        return totEnergy;
749
0
    }
750
10.7M
    else
751
10.7M
    {
752
        /* 4x4 is too small for sa8d */
753
10.7M
        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
754
10.7M
        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
755
10.7M
        return abs(sourceEnergy - reconEnergy);
756
10.7M
    }
757
10.7M
}
pixel.cpp:int (anonymous namespace)::psyCost_pp<1>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
728
2.50M
{
729
2.50M
    static pixel zeroBuf[8] /* = { 0 } */;
730
731
2.50M
    if (size)
732
2.50M
    {
733
2.50M
        int dim = 1 << (size + 2);
734
2.50M
        uint32_t totEnergy = 0;
735
5.00M
        for (int i = 0; i < dim; i += 8)
736
2.50M
        {
737
5.00M
            for (int j = 0; j < dim; j+= 8)
738
2.50M
            {
739
                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
740
2.50M
                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
741
2.50M
                                   (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
742
2.50M
                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
743
2.50M
                                   (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
744
745
2.50M
                totEnergy += abs(sourceEnergy - reconEnergy);
746
2.50M
            }
747
2.50M
        }
748
2.50M
        return totEnergy;
749
2.50M
    }
750
0
    else
751
0
    {
752
        /* 4x4 is too small for sa8d */
753
0
        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
754
0
        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
755
0
        return abs(sourceEnergy - reconEnergy);
756
0
    }
757
2.50M
}
pixel.cpp:int (anonymous namespace)::psyCost_pp<2>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
728
491k
{
729
491k
    static pixel zeroBuf[8] /* = { 0 } */;
730
731
491k
    if (size)
732
491k
    {
733
491k
        int dim = 1 << (size + 2);
734
491k
        uint32_t totEnergy = 0;
735
1.47M
        for (int i = 0; i < dim; i += 8)
736
982k
        {
737
2.94M
            for (int j = 0; j < dim; j+= 8)
738
1.96M
            {
739
                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
740
1.96M
                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
741
1.96M
                                   (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
742
1.96M
                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
743
1.96M
                                   (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
744
745
1.96M
                totEnergy += abs(sourceEnergy - reconEnergy);
746
1.96M
            }
747
982k
        }
748
491k
        return totEnergy;
749
491k
    }
750
0
    else
751
0
    {
752
        /* 4x4 is too small for sa8d */
753
0
        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
754
0
        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
755
0
        return abs(sourceEnergy - reconEnergy);
756
0
    }
757
491k
}
pixel.cpp:int (anonymous namespace)::psyCost_pp<3>(unsigned char const*, long, unsigned char const*, long)
Line
Count
Source
728
54.7k
{
729
54.7k
    static pixel zeroBuf[8] /* = { 0 } */;
730
731
54.7k
    if (size)
732
54.7k
    {
733
54.7k
        int dim = 1 << (size + 2);
734
54.7k
        uint32_t totEnergy = 0;
735
273k
        for (int i = 0; i < dim; i += 8)
736
219k
        {
737
1.09M
            for (int j = 0; j < dim; j+= 8)
738
876k
            {
739
                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
740
876k
                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
741
876k
                                   (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
742
876k
                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
743
876k
                                   (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
744
745
876k
                totEnergy += abs(sourceEnergy - reconEnergy);
746
876k
            }
747
219k
        }
748
54.7k
        return totEnergy;
749
54.7k
    }
750
0
    else
751
0
    {
752
        /* 4x4 is too small for sa8d */
753
0
        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
754
0
        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
755
0
        return abs(sourceEnergy - reconEnergy);
756
0
    }
757
54.7k
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<4>(unsigned char const*, long, unsigned char const*, long)
758
759
template<int bx, int by>
760
void blockcopy_pp_c(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb)
761
35.2M
{
762
220M
    for (int y = 0; y < by; y++)
763
185M
    {
764
1.54G
        for (int x = 0; x < bx; x++)
765
1.35G
            a[x] = b[x];
766
767
185M
        a += stridea;
768
185M
        b += strideb;
769
185M
    }
770
35.2M
}
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 4>(unsigned char*, long, unsigned char const*, long)
Line
Count
Source
761
27.7M
{
762
138M
    for (int y = 0; y < by; y++)
763
111M
    {
764
555M
        for (int x = 0; x < bx; x++)
765
444M
            a[x] = b[x];
766
767
111M
        a += stridea;
768
111M
        b += strideb;
769
111M
    }
770
27.7M
}
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 8>(unsigned char*, long, unsigned char const*, long)
Line
Count
Source
761
5.94M
{
762
53.4M
    for (int y = 0; y < by; y++)
763
47.4M
    {
764
426M
        for (int x = 0; x < bx; x++)
765
379M
            a[x] = b[x];
766
767
47.4M
        a += stridea;
768
47.4M
        b += strideb;
769
47.4M
    }
770
5.94M
}
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 16>(unsigned char*, long, unsigned char const*, long)
Line
Count
Source
761
1.32M
{
762
22.4M
    for (int y = 0; y < by; y++)
763
21.0M
    {
764
358M
        for (int x = 0; x < bx; x++)
765
336M
            a[x] = b[x];
766
767
21.0M
        a += stridea;
768
21.0M
        b += strideb;
769
21.0M
    }
770
1.32M
}
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 32>(unsigned char*, long, unsigned char const*, long)
Line
Count
Source
761
166k
{
762
5.46M
    for (int y = 0; y < by; y++)
763
5.29M
    {
764
174M
        for (int x = 0; x < bx; x++)
765
169M
            a[x] = b[x];
766
767
5.29M
        a += stridea;
768
5.29M
        b += strideb;
769
5.29M
    }
770
166k
}
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 64>(unsigned char*, long, unsigned char const*, long)
Line
Count
Source
761
6.19k
{
762
402k
    for (int y = 0; y < by; y++)
763
396k
    {
764
25.7M
        for (int x = 0; x < bx; x++)
765
25.3M
            a[x] = b[x];
766
767
396k
        a += stridea;
768
396k
        b += strideb;
769
396k
    }
770
6.19k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 12>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<12, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 24>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<24, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 48>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<48, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 2>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 2>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 6>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<6, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 2>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 12>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<6, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 24>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<12, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 48>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<24, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 64>(unsigned char*, long, unsigned char const*, long)
771
772
template<int bx, int by>
773
void blockcopy_ss_c(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb)
774
0
{
775
0
    for (int y = 0; y < by; y++)
776
0
    {
777
0
        for (int x = 0; x < bx; x++)
778
0
            a[x] = b[x];
779
780
0
        a += stridea;
781
0
        b += strideb;
782
0
    }
783
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<4, 4>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<8, 8>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<16, 16>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<32, 32>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<64, 64>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<2, 2>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<2, 4>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<4, 8>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<8, 16>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<16, 32>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<32, 64>(short*, long, short const*, long)
784
785
template<int bx, int by>
786
void blockcopy_sp_c(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb)
787
0
{
788
0
    for (int y = 0; y < by; y++)
789
0
    {
790
0
        for (int x = 0; x < bx; x++)
791
0
        {
792
0
            X265_CHECK((b[x] >= 0) && (b[x] <= ((1 << X265_DEPTH) - 1)), "blockcopy pixel size fail\n");
793
0
            a[x] = (pixel)b[x];
794
0
        }
795
796
0
        a += stridea;
797
0
        b += strideb;
798
0
    }
799
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<4, 4>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<8, 8>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<16, 16>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<32, 32>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<64, 64>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<2, 2>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<2, 4>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<4, 8>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<8, 16>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<16, 32>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<32, 64>(unsigned char*, long, short const*, long)
800
801
template<int bx, int by>
802
void blockcopy_ps_c(int16_t* a, intptr_t stridea, const pixel* b, intptr_t strideb)
803
3.68M
{
804
23.8M
    for (int y = 0; y < by; y++)
805
20.2M
    {
806
177M
        for (int x = 0; x < bx; x++)
807
157M
            a[x] = (int16_t)b[x];
808
809
20.2M
        a += stridea;
810
20.2M
        b += strideb;
811
20.2M
    }
812
3.68M
}
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<4, 4>(short*, long, unsigned char const*, long)
Line
Count
Source
803
2.80M
{
804
14.0M
    for (int y = 0; y < by; y++)
805
11.2M
    {
806
56.0M
        for (int x = 0; x < bx; x++)
807
44.8M
            a[x] = (int16_t)b[x];
808
809
11.2M
        a += stridea;
810
11.2M
        b += strideb;
811
11.2M
    }
812
2.80M
}
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<8, 8>(short*, long, unsigned char const*, long)
Line
Count
Source
803
696k
{
804
6.26M
    for (int y = 0; y < by; y++)
805
5.56M
    {
806
50.1M
        for (int x = 0; x < bx; x++)
807
44.5M
            a[x] = (int16_t)b[x];
808
809
5.56M
        a += stridea;
810
5.56M
        b += strideb;
811
5.56M
    }
812
696k
}
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<16, 16>(short*, long, unsigned char const*, long)
Line
Count
Source
803
163k
{
804
2.78M
    for (int y = 0; y < by; y++)
805
2.61M
    {
806
44.5M
        for (int x = 0; x < bx; x++)
807
41.8M
            a[x] = (int16_t)b[x];
808
809
2.61M
        a += stridea;
810
2.61M
        b += strideb;
811
2.61M
    }
812
163k
}
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<32, 32>(short*, long, unsigned char const*, long)
Line
Count
Source
803
25.2k
{
804
831k
    for (int y = 0; y < by; y++)
805
806k
    {
806
26.6M
        for (int x = 0; x < bx; x++)
807
25.7M
            a[x] = (int16_t)b[x];
808
809
806k
        a += stridea;
810
806k
        b += strideb;
811
806k
    }
812
25.2k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<64, 64>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<2, 2>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<2, 4>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<4, 8>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<8, 16>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<16, 32>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<32, 64>(short*, long, unsigned char const*, long)
813
814
template<int bx, int by>
815
void pixel_sub_ps_c(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1)
816
25.2k
{
817
619k
    for (int y = 0; y < by; y++)
818
593k
    {
819
23.0M
        for (int x = 0; x < bx; x++)
820
22.4M
            a[x] = (int16_t)(b0[x] - b1[x]);
821
822
593k
        b0 += sstride0;
823
593k
        b1 += sstride1;
824
593k
        a += dstride;
825
593k
    }
826
25.2k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<4, 4>(short*, long, unsigned char const*, unsigned char const*, long, long)
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<8, 8>(short*, long, unsigned char const*, unsigned char const*, long, long)
Line
Count
Source
816
9.66k
{
817
86.9k
    for (int y = 0; y < by; y++)
818
77.2k
    {
819
695k
        for (int x = 0; x < bx; x++)
820
618k
            a[x] = (int16_t)(b0[x] - b1[x]);
821
822
77.2k
        b0 += sstride0;
823
77.2k
        b1 += sstride1;
824
77.2k
        a += dstride;
825
77.2k
    }
826
9.66k
}
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<16, 16>(short*, long, unsigned char const*, unsigned char const*, long, long)
Line
Count
Source
816
5.45k
{
817
92.7k
    for (int y = 0; y < by; y++)
818
87.2k
    {
819
1.48M
        for (int x = 0; x < bx; x++)
820
1.39M
            a[x] = (int16_t)(b0[x] - b1[x]);
821
822
87.2k
        b0 += sstride0;
823
87.2k
        b1 += sstride1;
824
87.2k
        a += dstride;
825
87.2k
    }
826
5.45k
}
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<32, 32>(short*, long, unsigned char const*, unsigned char const*, long, long)
Line
Count
Source
816
6.86k
{
817
226k
    for (int y = 0; y < by; y++)
818
219k
    {
819
7.24M
        for (int x = 0; x < bx; x++)
820
7.02M
            a[x] = (int16_t)(b0[x] - b1[x]);
821
822
219k
        b0 += sstride0;
823
219k
        b1 += sstride1;
824
219k
        a += dstride;
825
219k
    }
826
6.86k
}
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<64, 64>(short*, long, unsigned char const*, unsigned char const*, long, long)
Line
Count
Source
816
3.27k
{
817
212k
    for (int y = 0; y < by; y++)
818
209k
    {
819
13.6M
        for (int x = 0; x < bx; x++)
820
13.4M
            a[x] = (int16_t)(b0[x] - b1[x]);
821
822
209k
        b0 += sstride0;
823
209k
        b1 += sstride1;
824
209k
        a += dstride;
825
209k
    }
826
3.27k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<2, 2>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<2, 4>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<4, 8>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<8, 16>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<16, 32>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<32, 64>(short*, long, unsigned char const*, unsigned char const*, long, long)
827
828
template<int bx, int by>
829
void pixel_add_ps_c(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1)
830
53.4k
{
831
568k
    for (int y = 0; y < by; y++)
832
514k
    {
833
8.67M
        for (int x = 0; x < bx; x++)
834
8.15M
            a[x] = x265_clip(b0[x] + b1[x]);
835
836
514k
        b0 += sstride0;
837
514k
        b1 += sstride1;
838
514k
        a += dstride;
839
514k
    }
840
53.4k
}
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<4, 4>(unsigned char*, long, unsigned char const*, short const*, long, long)
Line
Count
Source
830
24.1k
{
831
120k
    for (int y = 0; y < by; y++)
832
96.6k
    {
833
483k
        for (int x = 0; x < bx; x++)
834
386k
            a[x] = x265_clip(b0[x] + b1[x]);
835
836
96.6k
        b0 += sstride0;
837
96.6k
        b1 += sstride1;
838
96.6k
        a += dstride;
839
96.6k
    }
840
24.1k
}
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<8, 8>(unsigned char*, long, unsigned char const*, short const*, long, long)
Line
Count
Source
830
13.9k
{
831
125k
    for (int y = 0; y < by; y++)
832
111k
    {
833
1.00M
        for (int x = 0; x < bx; x++)
834
891k
            a[x] = x265_clip(b0[x] + b1[x]);
835
836
111k
        b0 += sstride0;
837
111k
        b1 += sstride1;
838
111k
        a += dstride;
839
111k
    }
840
13.9k
}
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<16, 16>(unsigned char*, long, unsigned char const*, short const*, long, long)
Line
Count
Source
830
11.4k
{
831
195k
    for (int y = 0; y < by; y++)
832
183k
    {
833
3.12M
        for (int x = 0; x < bx; x++)
834
2.94M
            a[x] = x265_clip(b0[x] + b1[x]);
835
836
183k
        b0 += sstride0;
837
183k
        b1 += sstride1;
838
183k
        a += dstride;
839
183k
    }
840
11.4k
}
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<32, 32>(unsigned char*, long, unsigned char const*, short const*, long, long)
Line
Count
Source
830
3.84k
{
831
126k
    for (int y = 0; y < by; y++)
832
122k
    {
833
4.05M
        for (int x = 0; x < bx; x++)
834
3.93M
            a[x] = x265_clip(b0[x] + b1[x]);
835
836
122k
        b0 += sstride0;
837
122k
        b1 += sstride1;
838
122k
        a += dstride;
839
122k
    }
840
3.84k
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<64, 64>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<2, 2>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<2, 4>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<4, 8>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<8, 16>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<16, 32>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<32, 64>(unsigned char*, long, unsigned char const*, short const*, long, long)
841
842
template<int bx, int by>
843
void addAvg(const int16_t* src0, const int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
844
0
{
845
0
    int shiftNum, offset;
846
847
0
    shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
848
0
    offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
849
850
0
    for (int y = 0; y < by; y++)
851
0
    {
852
0
        for (int x = 0; x < bx; x += 2)
853
0
        {
854
0
            dst[x + 0] = x265_clip((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
855
0
            dst[x + 1] = x265_clip((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
856
0
        }
857
858
0
        src0 += src0Stride;
859
0
        src1 += src1Stride;
860
0
        dst  += dstStride;
861
0
    }
862
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 12>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<12, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 24>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<24, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 48>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<48, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 2>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 2>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 6>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<6, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 2>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 12>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<6, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 24>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<12, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 48>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<24, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 64>(short const*, short const*, unsigned char*, long, long, long)
863
864
static void planecopy_cp_c(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
865
0
{
866
0
    for (int r = 0; r < height; r++)
867
0
    {
868
0
        for (int c = 0; c < width; c++)
869
0
            dst[c] = ((pixel)src[c]) << shift;
870
871
0
        dst += dstStride;
872
0
        src += srcStride;
873
0
    }
874
0
}
875
876
static void planecopy_sp_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
877
0
{
878
0
    for (int r = 0; r < height; r++)
879
0
    {
880
0
        for (int c = 0; c < width; c++)
881
0
            dst[c] = (pixel)((src[c] >> shift) & mask);
882
883
0
        dst += dstStride;
884
0
        src += srcStride;
885
0
    }
886
0
}
887
888
static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
889
0
{
890
0
    for (int r = 0; r < height; r++)
891
0
    {
892
0
        for (int c = 0; c < width; c++)
893
0
            dst[c] = (pixel)((src[c] >> shift));
894
895
0
        dst += dstStride;
896
0
        src += srcStride;
897
0
    }
898
0
}
899
900
static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
901
0
{
902
0
    for (int r = 0; r < height; r++)
903
0
    {
904
0
        for (int c = 0; c < width; c++)
905
0
            dst[c] = (pixel)((src[c] << shift) & mask);
906
907
0
        dst += dstStride;
908
0
        src += srcStride;
909
0
    }
910
0
}
911
912
/* Estimate the total amount of influence on future quality that could be had if we
913
 * were to improve the reference samples used to inter predict any given CU. */
914
static void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
915
                                    const int32_t* invQscales, const double* fpsFactor, int len)
916
0
{
917
0
    double fps = *fpsFactor / 256;  // range[0.01, 1.00]
918
0
    for (int i = 0; i < len; i++)
919
0
    {
920
0
        int intraCost = intraCosts[i];
921
0
        int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
922
0
        double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
923
0
        double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
924
0
        double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
925
926
#if 0
927
        // algorithm that output match to asm
928
        float intraRcp = (float)1.0f / intraCost;   // VC can't mapping this into RCPPS
929
        float intraRcpError1 = (float)intraCost * (float)intraRcp;
930
        intraRcpError1 *= (float)intraRcp;
931
        float intraRcpError2 = intraRcp + intraRcp;
932
        float propagateDenom = intraRcpError2 - intraRcpError1;
933
        dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5);
934
#else
935
0
        double propagateDenom = (double)intraCost;             // Q32
936
0
        dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);
937
0
#endif
938
0
        }
939
    //}
940
0
}
941
942
/* Conversion between double and Q8.8 fixed point (big-endian) for storage */
943
static void cuTreeFix8Pack(uint16_t *dst, double *src, int count)
944
0
{
945
0
    for (int i = 0; i < count; i++)
946
0
        dst[i] = (uint16_t)(int16_t)(src[i] * 256.0);
947
0
}
948
949
static void cuTreeFix8Unpack(double *dst, uint16_t *src, int count)
950
0
{
951
0
    for (int i = 0; i < count; i++)
952
0
    {
953
0
        int16_t qpFix8 = src[i];
954
0
        dst[i] = (double)(qpFix8) / 256.0;
955
0
    }
956
0
}
957
958
template<int log2TrSize>
959
static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k)
960
0
{
961
0
    *ssBlock = 0;
962
0
    int trSize = 1 << log2TrSize;
963
0
    for (int y = 0; y < trSize; y++)
964
0
    {
965
0
        for (int x = 0; x < trSize; x++)
966
0
        {
967
0
            int temp = fenc[y * fStride + x] - recon[y * rstride + x]; // copy of residual coeff
968
0
            *ssBlock += temp * temp;
969
0
        }
970
0
    }
971
972
0
    *ac_k = 0;
973
0
    for (int block_yy = 0; block_yy < trSize; block_yy += 1)
974
0
    {
975
0
        for (int block_xx = 0; block_xx < trSize; block_xx += 1)
976
0
        {
977
0
            uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift;
978
0
            *ac_k += temp * temp;
979
0
        }
980
0
    }
981
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<2>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<3>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<4>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<5>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<6>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
982
983
static void normFact_c(const pixel* src, uint32_t blockSize, int shift, uint64_t *z_k)
984
0
{
985
0
    *z_k = 0;
986
0
    for (uint32_t block_yy = 0; block_yy < blockSize; block_yy += 1)
987
0
    {
988
0
        for (uint32_t block_xx = 0; block_xx < blockSize; block_xx += 1)
989
0
        {
990
0
            uint32_t temp = src[block_yy * blockSize + block_xx] >> shift;
991
0
            *z_k += temp * temp;
992
0
        }
993
0
    }
994
0
}
995
996
#if HIGH_BIT_DEPTH
997
static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, 
998
                               const pixel minPix, const pixel maxPix)
999
{
1000
    pixel maxLumaLevel = 0;
1001
    uint64_t sumLuma = 0;
1002
1003
    for (int r = 0; r < height; r++)
1004
    {
1005
        for (int c = 0; c < width; c++)
1006
        {
1007
            /* Clip luma of source picture to max and min*/
1008
            src[c] = x265_clip3((pixel)minPix, (pixel)maxPix, src[c]);
1009
            maxLumaLevel = X265_MAX(src[c], maxLumaLevel);
1010
            sumLuma += src[c];
1011
        }
1012
        src += stride;
1013
    }
1014
    *outsum = sumLuma;
1015
    return maxLumaLevel;
1016
}
1017
1018
#endif
1019
}  // end anonymous namespace
1020
1021
namespace X265_NS {
1022
// x265 private namespace
1023
1024
/* Extend the edges of a picture so that it may safely be used for motion
1025
 * compensation. This function assumes the picture is stored in a buffer with
1026
 * sufficient padding for the X and Y margins */
1027
void extendPicBorder(pixel* pic, intptr_t stride, int width, int height, int marginX, int marginY)
1028
2.79k
{
1029
    /* extend left and right margins */
1030
2.79k
    primitives.extendRowBorder(pic, stride, width, height, marginX);
1031
1032
    /* copy top row to create above margin */
1033
2.79k
    pixel* top = pic - marginX;
1034
189k
    for (int y = 0; y < marginY; y++)
1035
186k
        memcpy(top - (y + 1) * stride, top, stride * sizeof(pixel));
1036
1037
    /* copy bottom row to create below margin */
1038
2.79k
    pixel* bot = pic - marginX + (height - 1) * stride;
1039
189k
    for (int y = 0; y < marginY; y++)
1040
186k
        memcpy(bot + (y + 1) * stride, bot, stride * sizeof(pixel));
1041
2.79k
}
1042
1043
/* Initialize entries for pixel functions defined in this file */
1044
void setupPixelPrimitives_c(EncoderPrimitives &p)
1045
1
{
1046
1
#define LUMA_PU(W, H) \
1047
25
    p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
1048
25
    p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg<W, H>; \
1049
25
    p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg<W, H>; \
1050
25
    p.pu[LUMA_ ## W ## x ## H].sad = sad<W, H>; \
1051
25
    p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3<W, H>; \
1052
25
    p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4<W, H>; \
1053
25
    p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp<W, H>; \
1054
25
    p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[ALIGNED] = pixelavg_pp<W, H>;
1055
1
#define LUMA_CU(W, H) \
1056
5
    p.cu[BLOCK_ ## W ## x ## H].sub_ps        = pixel_sub_ps_c<W, H>; \
1057
5
    p.cu[BLOCK_ ## W ## x ## H].add_ps[NONALIGNED]    = pixel_add_ps_c<W, H>; \
1058
5
    p.cu[BLOCK_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>; \
1059
5
    p.cu[BLOCK_ ## W ## x ## H].copy_sp       = blockcopy_sp_c<W, H>; \
1060
5
    p.cu[BLOCK_ ## W ## x ## H].copy_ps       = blockcopy_ps_c<W, H>; \
1061
5
    p.cu[BLOCK_ ## W ## x ## H].copy_ss       = blockcopy_ss_c<W, H>; \
1062
5
    p.cu[BLOCK_ ## W ## x ## H].blockfill_s[NONALIGNED] = blockfill_s_c<W>;  \
1063
5
    p.cu[BLOCK_ ## W ## x ## H].blockfill_s[ALIGNED]    = blockfill_s_c<W>;  \
1064
5
    p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shl = cpy2Dto1D_shl<W>; \
1065
5
    p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shr = cpy2Dto1D_shr<W>; \
1066
5
    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[NONALIGNED] = cpy1Dto2D_shl<W>; \
1067
5
    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[ALIGNED] = cpy1Dto2D_shl<W>; \
1068
5
    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shr = cpy1Dto2D_shr<W>; \
1069
5
    p.cu[BLOCK_ ## W ## x ## H].psy_cost_pp   = psyCost_pp<BLOCK_ ## W ## x ## H>; \
1070
5
    p.cu[BLOCK_ ## W ## x ## H].transpose     = transpose<W>; \
1071
5
    p.cu[BLOCK_ ## W ## x ## H].ssd_s[NONALIGNED]         = pixel_ssd_s_c<W>; \
1072
5
    p.cu[BLOCK_ ## W ## x ## H].ssd_s[ALIGNED] = pixel_ssd_s_c<W>; \
1073
5
    p.cu[BLOCK_ ## W ## x ## H].var           = pixel_var<W>; \
1074
5
    p.cu[BLOCK_ ## W ## x ## H].calcresidual[NONALIGNED]  = getResidual<W>; \
1075
5
    p.cu[BLOCK_ ## W ## x ## H].calcresidual[ALIGNED]     = getResidual<W>; \
1076
5
    p.cu[BLOCK_ ## W ## x ## H].sse_pp        = sse<W, H, pixel, pixel>; \
1077
5
    p.cu[BLOCK_ ## W ## x ## H].sse_ss        = sse<W, H, int16_t, int16_t>;
1078
1079
1
    LUMA_PU(4, 4);
1080
1
    LUMA_PU(8, 8);
1081
1
    LUMA_PU(16, 16);
1082
1
    LUMA_PU(32, 32);
1083
1
    LUMA_PU(64, 64);
1084
1
    LUMA_PU(4, 8);
1085
1
    LUMA_PU(8, 4);
1086
1
    LUMA_PU(16,  8);
1087
1
    LUMA_PU(8, 16);
1088
1
    LUMA_PU(16, 12);
1089
1
    LUMA_PU(12, 16);
1090
1
    LUMA_PU(16,  4);
1091
1
    LUMA_PU(4, 16);
1092
1
    LUMA_PU(32, 16);
1093
1
    LUMA_PU(16, 32);
1094
1
    LUMA_PU(32, 24);
1095
1
    LUMA_PU(24, 32);
1096
1
    LUMA_PU(32,  8);
1097
1
    LUMA_PU(8, 32);
1098
1
    LUMA_PU(64, 32);
1099
1
    LUMA_PU(32, 64);
1100
1
    LUMA_PU(64, 48);
1101
1
    LUMA_PU(48, 64);
1102
1
    LUMA_PU(64, 16);
1103
1
    LUMA_PU(16, 64);
1104
1105
1
    p.pu[LUMA_4x4].ads = ads_x1<4, 4>;
1106
1
    p.pu[LUMA_8x8].ads = ads_x1<8, 8>;
1107
1
    p.pu[LUMA_8x4].ads = ads_x2<8, 4>;
1108
1
    p.pu[LUMA_4x8].ads = ads_x2<4, 8>;
1109
1
    p.pu[LUMA_16x16].ads = ads_x4<16, 16>;
1110
1
    p.pu[LUMA_16x8].ads = ads_x2<16, 8>;
1111
1
    p.pu[LUMA_8x16].ads = ads_x2<8, 16>;
1112
1
    p.pu[LUMA_16x12].ads = ads_x1<16, 12>;
1113
1
    p.pu[LUMA_12x16].ads = ads_x1<12, 16>;
1114
1
    p.pu[LUMA_16x4].ads = ads_x1<16, 4>;
1115
1
    p.pu[LUMA_4x16].ads = ads_x1<4, 16>;
1116
1
    p.pu[LUMA_32x32].ads = ads_x4<32, 32>;
1117
1
    p.pu[LUMA_32x16].ads = ads_x2<32, 16>;
1118
1
    p.pu[LUMA_16x32].ads = ads_x2<16, 32>;
1119
1
    p.pu[LUMA_32x24].ads = ads_x4<32, 24>;
1120
1
    p.pu[LUMA_24x32].ads = ads_x4<24, 32>;
1121
1
    p.pu[LUMA_32x8].ads = ads_x4<32, 8>;
1122
1
    p.pu[LUMA_8x32].ads = ads_x4<8, 32>;
1123
1
    p.pu[LUMA_64x64].ads = ads_x4<64, 64>;
1124
1
    p.pu[LUMA_64x32].ads = ads_x2<64, 32>;
1125
1
    p.pu[LUMA_32x64].ads = ads_x2<32, 64>;
1126
1
    p.pu[LUMA_64x48].ads = ads_x4<64, 48>;
1127
1
    p.pu[LUMA_48x64].ads = ads_x4<48, 64>;
1128
1
    p.pu[LUMA_64x16].ads = ads_x4<64, 16>;
1129
1
    p.pu[LUMA_16x64].ads = ads_x4<16, 64>;
1130
1131
1
    p.pu[LUMA_4x4].satd   = satd_4x4;
1132
1
    p.pu[LUMA_8x8].satd   = satd8<8, 8>;
1133
1
    p.pu[LUMA_8x4].satd   = satd_8x4;
1134
1
    p.pu[LUMA_4x8].satd   = satd4<4, 8>;
1135
1
    p.pu[LUMA_16x16].satd = satd8<16, 16>;
1136
1
    p.pu[LUMA_16x8].satd  = satd8<16, 8>;
1137
1
    p.pu[LUMA_8x16].satd  = satd8<8, 16>;
1138
1
    p.pu[LUMA_16x12].satd = satd8<16, 12>;
1139
1
    p.pu[LUMA_12x16].satd = satd4<12, 16>;
1140
1
    p.pu[LUMA_16x4].satd  = satd8<16, 4>;
1141
1
    p.pu[LUMA_4x16].satd  = satd4<4, 16>;
1142
1
    p.pu[LUMA_32x32].satd = satd8<32, 32>;
1143
1
    p.pu[LUMA_32x16].satd = satd8<32, 16>;
1144
1
    p.pu[LUMA_16x32].satd = satd8<16, 32>;
1145
1
    p.pu[LUMA_32x24].satd = satd8<32, 24>;
1146
1
    p.pu[LUMA_24x32].satd = satd8<24, 32>;
1147
1
    p.pu[LUMA_32x8].satd  = satd8<32, 8>;
1148
1
    p.pu[LUMA_8x32].satd  = satd8<8, 32>;
1149
1
    p.pu[LUMA_64x64].satd = satd8<64, 64>;
1150
1
    p.pu[LUMA_64x32].satd = satd8<64, 32>;
1151
1
    p.pu[LUMA_32x64].satd = satd8<32, 64>;
1152
1
    p.pu[LUMA_64x48].satd = satd8<64, 48>;
1153
1
    p.pu[LUMA_48x64].satd = satd8<48, 64>;
1154
1
    p.pu[LUMA_64x16].satd = satd8<64, 16>;
1155
1
    p.pu[LUMA_16x64].satd = satd8<16, 64>;
1156
1157
1
    LUMA_CU(4, 4);
1158
1
    LUMA_CU(8, 8);
1159
1
    LUMA_CU(16, 16);
1160
1
    LUMA_CU(32, 32);
1161
1
    LUMA_CU(64, 64);
1162
1163
1
    p.cu[BLOCK_4x4].sa8d   = satd_4x4;
1164
1
    p.cu[BLOCK_8x8].sa8d   = sa8d_8x8;
1165
1
    p.cu[BLOCK_16x16].sa8d = sa8d_16x16;
1166
1
    p.cu[BLOCK_32x32].sa8d = sa8d16<32, 32>;
1167
1
    p.cu[BLOCK_64x64].sa8d = sa8d16<64, 64>;
1168
1169
1
#define CHROMA_PU_420(W, H) \
1170
25
    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[NONALIGNED]  = addAvg<W, H>;         \
1171
25
    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[ALIGNED]  = addAvg<W, H>;         \
1172
25
    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
1173
1
1174
1
    CHROMA_PU_420(2, 2);
1175
1
    CHROMA_PU_420(2, 4);
1176
1
    CHROMA_PU_420(4, 4);
1177
1
    CHROMA_PU_420(8, 8);
1178
1
    CHROMA_PU_420(16, 16);
1179
1
    CHROMA_PU_420(32, 32);
1180
1
    CHROMA_PU_420(4, 2);
1181
1
    CHROMA_PU_420(8, 4);
1182
1
    CHROMA_PU_420(4, 8);
1183
1
    CHROMA_PU_420(8, 6);
1184
1
    CHROMA_PU_420(6, 8);
1185
1
    CHROMA_PU_420(8, 2);
1186
1
    CHROMA_PU_420(2, 8);
1187
1
    CHROMA_PU_420(16, 8);
1188
1
    CHROMA_PU_420(8,  16);
1189
1
    CHROMA_PU_420(16, 12);
1190
1
    CHROMA_PU_420(12, 16);
1191
1
    CHROMA_PU_420(16, 4);
1192
1
    CHROMA_PU_420(4,  16);
1193
1
    CHROMA_PU_420(32, 16);
1194
1
    CHROMA_PU_420(16, 32);
1195
1
    CHROMA_PU_420(32, 24);
1196
1
    CHROMA_PU_420(24, 32);
1197
1
    CHROMA_PU_420(32, 8);
1198
1
    CHROMA_PU_420(8,  32);
1199
1200
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x2].satd   = NULL;
1201
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd   = satd_4x4;
1202
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd   = satd8<8, 8>;
1203
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = satd8<16, 16>;
1204
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = satd8<32, 32>;
1205
1206
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].satd   = NULL;
1207
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].satd   = NULL;
1208
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd   = satd_8x4;
1209
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd   = satd4<4, 8>;
1210
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd  = satd8<16, 8>;
1211
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].satd  = satd8<8, 16>;
1212
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = satd8<32, 16>;
1213
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = satd8<16, 32>;
1214
1215
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].satd   = NULL;
1216
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].satd   = NULL;
1217
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].satd   = NULL;
1218
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].satd   = NULL;
1219
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].satd = satd4<16, 12>;
1220
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = satd4<12, 16>;
1221
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].satd  = satd4<16, 4>;
1222
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd  = satd4<4, 16>;
1223
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = satd8<32, 24>;
1224
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].satd = satd8<24, 32>;
1225
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].satd  = satd8<32, 8>;
1226
1
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].satd  = satd8<8, 32>;
1227
1228
1
#define CHROMA_CU_420(W, H) \
1229
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sse_pp  = sse<W, H, pixel, pixel>; \
1230
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
1231
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
1232
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \
1233
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>;  \
1234
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \
1235
5
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>;
1236
1237
1
    CHROMA_CU_420(2, 2)
1238
1
    CHROMA_CU_420(4, 4)
1239
1
    CHROMA_CU_420(8, 8)
1240
1
    CHROMA_CU_420(16, 16)
1241
1
    CHROMA_CU_420(32, 32)
1242
1243
1
    p.chroma[X265_CSP_I420].cu[BLOCK_8x8].sa8d   = p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd;
1244
1
    p.chroma[X265_CSP_I420].cu[BLOCK_16x16].sa8d = sa8d8<8, 8>;
1245
1
    p.chroma[X265_CSP_I420].cu[BLOCK_32x32].sa8d = sa8d16<16, 16>;
1246
1
    p.chroma[X265_CSP_I420].cu[BLOCK_64x64].sa8d = sa8d16<32, 32>;
1247
1248
1
#define CHROMA_PU_422(W, H) \
1249
25
    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[NONALIGNED]  = addAvg<W, H>;         \
1250
25
    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[ALIGNED]  = addAvg<W, H>;         \
1251
25
    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
1252
1
1253
1
    CHROMA_PU_422(2, 4);
1254
1
    CHROMA_PU_422(4, 8);
1255
1
    CHROMA_PU_422(8, 16);
1256
1
    CHROMA_PU_422(16, 32);
1257
1
    CHROMA_PU_422(32, 64);
1258
1
    CHROMA_PU_422(4, 4);
1259
1
    CHROMA_PU_422(2, 8);
1260
1
    CHROMA_PU_422(8, 8);
1261
1
    CHROMA_PU_422(4, 16);
1262
1
    CHROMA_PU_422(8, 12);
1263
1
    CHROMA_PU_422(6, 16);
1264
1
    CHROMA_PU_422(8, 4);
1265
1
    CHROMA_PU_422(2, 16);
1266
1
    CHROMA_PU_422(16, 16);
1267
1
    CHROMA_PU_422(8, 32);
1268
1
    CHROMA_PU_422(16, 24);
1269
1
    CHROMA_PU_422(12, 32);
1270
1
    CHROMA_PU_422(16, 8);
1271
1
    CHROMA_PU_422(4,  32);
1272
1
    CHROMA_PU_422(32, 32);
1273
1
    CHROMA_PU_422(16, 64);
1274
1
    CHROMA_PU_422(32, 48);
1275
1
    CHROMA_PU_422(24, 64);
1276
1
    CHROMA_PU_422(32, 16);
1277
1
    CHROMA_PU_422(8,  64);
1278
1279
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x4].satd   = NULL;
1280
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd   = satd4<4, 8>;
1281
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].satd  = satd8<8, 16>;
1282
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].satd = satd8<16, 32>;
1283
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].satd = satd8<32, 64>;
1284
1285
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd   = satd_4x4;
1286
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].satd   = NULL;
1287
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd   = satd8<8, 8>;
1288
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd  = satd4<4, 16>;
1289
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].satd = satd8<16, 16>;
1290
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].satd  = satd8<8, 32>;
1291
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].satd = satd8<32, 32>;
1292
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].satd = satd8<16, 64>;
1293
1294
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd  = satd4<8, 12>;
1295
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].satd  = NULL;
1296
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd   = satd4<8, 4>;
1297
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].satd  = NULL;
1298
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].satd = satd8<16, 24>;
1299
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = satd4<12, 32>;
1300
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].satd  = satd8<16, 8>;
1301
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd  = satd4<4, 32>;
1302
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].satd = satd8<32, 48>;
1303
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].satd = satd8<24, 64>;
1304
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = satd8<32, 16>;
1305
1
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd  = satd8<8, 64>;
1306
1307
1
#define CHROMA_CU_422(W, H) \
1308
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sse_pp  = sse<W, H, pixel, pixel>; \
1309
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
1310
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
1311
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \
1312
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
1313
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \
1314
5
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>;
1315
1316
1
    CHROMA_CU_422(2, 4)
1317
1
    CHROMA_CU_422(4, 8)
1318
1
    CHROMA_CU_422(8, 16)
1319
1
    CHROMA_CU_422(16, 32)
1320
1
    CHROMA_CU_422(32, 64)
1321
1322
1
    p.chroma[X265_CSP_I422].cu[BLOCK_8x8].sa8d   = p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd;
1323
1
    p.chroma[X265_CSP_I422].cu[BLOCK_16x16].sa8d = sa8d8<8, 16>;
1324
1
    p.chroma[X265_CSP_I422].cu[BLOCK_32x32].sa8d = sa8d16<16, 32>;
1325
1
    p.chroma[X265_CSP_I422].cu[BLOCK_64x64].sa8d = sa8d16<32, 64>;
1326
1327
1
    p.weight_pp = weight_pp_c;
1328
1
    p.weight_sp = weight_sp_c;
1329
1330
1
    p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64;
1331
1
    p.scale2D_64to32 = scale2D_64to32;
1332
1
    p.frameInitLowres = frame_init_lowres_core;
1333
1
    p.frameInitLowerRes = frame_init_lowres_core;
1334
1
    p.ssim_4x4x2_core = ssim_4x4x2_core;
1335
1
    p.ssim_end_4 = ssim_end_4;
1336
1337
1
    p.planecopy_cp = planecopy_cp_c;
1338
1
    p.planecopy_sp = planecopy_sp_c;
1339
1
    p.planecopy_sp_shl = planecopy_sp_shl_c;
1340
1
    p.planecopy_pp_shr = planecopy_pp_shr_c;
1341
#if HIGH_BIT_DEPTH
1342
    p.planeClipAndMax = planeClipAndMax_c;
1343
#endif
1344
1
    p.propagateCost = estimateCUPropagateCost;
1345
1
    p.fix8Unpack = cuTreeFix8Unpack;
1346
1
    p.fix8Pack = cuTreeFix8Pack;
1347
1348
1
    p.cu[BLOCK_4x4].ssimDist = ssimDist_c<2>;
1349
1
    p.cu[BLOCK_8x8].ssimDist = ssimDist_c<3>;
1350
1
    p.cu[BLOCK_16x16].ssimDist = ssimDist_c<4>;
1351
1
    p.cu[BLOCK_32x32].ssimDist = ssimDist_c<5>;
1352
1
    p.cu[BLOCK_64x64].ssimDist = ssimDist_c<6>;
1353
1354
1
    p.cu[BLOCK_8x8].normFact = normFact_c;
1355
1
    p.cu[BLOCK_16x16].normFact = normFact_c;
1356
1
    p.cu[BLOCK_32x32].normFact = normFact_c;
1357
1
    p.cu[BLOCK_64x64].normFact = normFact_c;
1358
1
}
1359
}