Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/common/pixel.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *          Mandar Gurav <mandar@multicorewareinc.com>
6
 *          Mahesh Pittala <mahesh@multicorewareinc.com>
7
 *          Min Chen <min.chen@multicorewareinc.com>
8
 *          Hongbin Liu<liuhongbin1@huawei.com>
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23
 *
24
 * This program is also available under a commercial proprietary license.
25
 * For more information, contact us at license @ x265.com.
26
 *****************************************************************************/
27
28
#include "common.h"
29
#include "slicetype.h"      // LOWRES_COST_MASK
30
#include "primitives.h"
31
#include "x265.h"
32
33
#include <cstdlib> // abs()
34
35
using namespace X265_NS;
36
37
namespace {
38
// place functions in anonymous namespace (file static)
39
40
template<int lx, int ly>
41
int sad(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
42
0
{
43
0
    int sum = 0;
44
45
0
    for (int y = 0; y < ly; y++)
46
0
    {
47
0
        for (int x = 0; x < lx; x++)
48
0
            sum += abs(pix1[x] - pix2[x]);
49
50
0
        pix1 += stride_pix1;
51
0
        pix2 += stride_pix2;
52
0
    }
53
54
0
    return sum;
55
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<12, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 24>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<24, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 48>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<48, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 64>(unsigned char const*, long, unsigned char const*, long)
56
57
template<int lx, int ly>
58
int sad(const int16_t* pix1, intptr_t stride_pix1, const int16_t* pix2, intptr_t stride_pix2)
59
{
60
    int sum = 0;
61
62
    for (int y = 0; y < ly; y++)
63
    {
64
        for (int x = 0; x < lx; x++)
65
            sum += abs(pix1[x] - pix2[x]);
66
67
        pix1 += stride_pix1;
68
        pix2 += stride_pix2;
69
    }
70
71
    return sum;
72
}
73
74
template<int lx, int ly>
75
void sad_x3(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, intptr_t frefstride, int32_t* res)
76
0
{
77
0
    res[0] = 0;
78
0
    res[1] = 0;
79
0
    res[2] = 0;
80
0
    for (int y = 0; y < ly; y++)
81
0
    {
82
0
        for (int x = 0; x < lx; x++)
83
0
        {
84
0
            res[0] += abs(pix1[x] - pix2[x]);
85
0
            res[1] += abs(pix1[x] - pix3[x]);
86
0
            res[2] += abs(pix1[x] - pix4[x]);
87
0
        }
88
89
0
        pix1 += FENC_STRIDE;
90
0
        pix2 += frefstride;
91
0
        pix3 += frefstride;
92
0
        pix4 += frefstride;
93
0
    }
94
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 12>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<12, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 24>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<24, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 48>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<48, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
95
96
template<int lx, int ly>
97
void sad_x4(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, const pixel* pix5, intptr_t frefstride, int32_t* res)
98
0
{
99
0
    res[0] = 0;
100
0
    res[1] = 0;
101
0
    res[2] = 0;
102
0
    res[3] = 0;
103
0
    for (int y = 0; y < ly; y++)
104
0
    {
105
0
        for (int x = 0; x < lx; x++)
106
0
        {
107
0
            res[0] += abs(pix1[x] - pix2[x]);
108
0
            res[1] += abs(pix1[x] - pix3[x]);
109
0
            res[2] += abs(pix1[x] - pix4[x]);
110
0
            res[3] += abs(pix1[x] - pix5[x]);
111
0
        }
112
113
0
        pix1 += FENC_STRIDE;
114
0
        pix2 += frefstride;
115
0
        pix3 += frefstride;
116
0
        pix4 += frefstride;
117
0
        pix5 += frefstride;
118
0
    }
119
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 12>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<12, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 24>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<24, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 48>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<48, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*)
120
121
template<int lx, int ly>
122
int ads_x4(int encDC[4], uint32_t *sums, int delta, uint16_t *costMvX, int16_t *mvs, int width, int thresh)
123
0
{
124
0
    int nmv = 0;
125
0
    for (int16_t i = 0; i < width; i++, sums++)
126
0
    {
127
0
        int ads = abs(encDC[0] - long(sums[0]))
128
0
            + abs(encDC[1] - long(sums[lx >> 1]))
129
0
            + abs(encDC[2] - long(sums[delta]))
130
0
            + abs(encDC[3] - long(sums[delta + (lx >> 1)]))
131
0
            + costMvX[i];
132
0
        if (ads < thresh)
133
0
            mvs[nmv++] = i;
134
0
    }
135
0
    return nmv;
136
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<16, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 24>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<24, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<8, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 48>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<48, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<16, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
137
138
template<int lx, int ly>
139
int ads_x2(int encDC[2], uint32_t *sums, int delta, uint16_t *costMvX, int16_t *mvs, int width, int thresh)
140
0
{
141
0
    int nmv = 0;
142
0
    for (int16_t i = 0; i < width; i++, sums++)
143
0
    {
144
0
        int ads = abs(encDC[0] - long(sums[0]))
145
0
            + abs(encDC[1] - long(sums[delta]))
146
0
            + costMvX[i];
147
0
        if (ads < thresh)
148
0
            mvs[nmv++] = i;
149
0
    }
150
0
    return nmv;
151
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<8, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<4, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<16, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<8, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<32, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<16, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<64, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<32, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int)
152
153
template<int lx, int ly>
154
int ads_x1(int encDC[1], uint32_t *sums, int, uint16_t *costMvX, int16_t *mvs, int width, int thresh)
155
0
{
156
0
    int nmv = 0;
157
0
    for (int16_t i = 0; i < width; i++, sums++)
158
0
    {
159
0
        int ads = abs(encDC[0] - long(sums[0]))
160
0
            + costMvX[i];
161
0
        if (ads < thresh)
162
0
            mvs[nmv++] = i;
163
0
    }
164
0
    return nmv;
165
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<4, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<8, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<16, 12>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<12, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<16, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<4, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int)
166
167
template<int lx, int ly, class T1, class T2>
168
sse_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
169
0
{
170
0
    sse_t sum = 0;
171
0
    int tmp;
172
173
0
    for (int y = 0; y < ly; y++)
174
0
    {
175
0
        for (int x = 0; x < lx; x++)
176
0
        {
177
0
            tmp = pix1[x] - pix2[x];
178
0
            sum += (tmp * tmp);
179
0
        }
180
181
0
        pix1 += stride_pix1;
182
0
        pix2 += stride_pix2;
183
0
    }
184
185
0
    return sum;
186
0
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 4, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 4, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 8, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 8, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 16, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 16, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 32, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 32, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<64, 64, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<64, 64, short, short>(short const*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<2, 2, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<2, 4, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 8, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 16, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 32, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 64, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long)
187
188
0
#define BITS_PER_SUM (8 * sizeof(sum_t))
189
190
0
#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) { \
191
0
        sum2_t t0 = s0 + s1; \
192
0
        sum2_t t1 = s0 - s1; \
193
0
        sum2_t t2 = s2 + s3; \
194
0
        sum2_t t3 = s2 - s3; \
195
0
        d0 = t0 + t2; \
196
0
        d2 = t0 - t2; \
197
0
        d1 = t1 + t3; \
198
0
        d3 = t1 - t3; \
199
0
}
200
201
// in: a pseudo-simd number of the form x+(y<<16)
202
// return: abs(x)+(abs(y)<<16)
203
inline sum2_t abs2(sum2_t a)
204
0
{
205
0
    sum2_t s = ((a >> (BITS_PER_SUM - 1)) & (((sum2_t)1 << BITS_PER_SUM) + 1)) * ((sum_t)-1);
206
207
0
    return (a + s) ^ s;
208
0
}
209
210
static int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
211
0
{
212
0
    sum2_t tmp[4][2];
213
0
    sum2_t a0, a1, a2, a3, b0, b1;
214
0
    sum2_t sum = 0;
215
216
0
    for (int i = 0; i < 4; i++, pix1 += stride_pix1, pix2 += stride_pix2)
217
0
    {
218
0
        a0 = pix1[0] - pix2[0];
219
0
        a1 = pix1[1] - pix2[1];
220
0
        b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM);
221
0
        a2 = pix1[2] - pix2[2];
222
0
        a3 = pix1[3] - pix2[3];
223
0
        b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM);
224
0
        tmp[i][0] = b0 + b1;
225
0
        tmp[i][1] = b0 - b1;
226
0
    }
227
228
0
    for (int i = 0; i < 2; i++)
229
0
    {
230
0
        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
231
0
        a0 = abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
232
0
        sum += ((sum_t)a0) + (a0 >> BITS_PER_SUM);
233
0
    }
234
235
0
    return (int)(sum >> 1);
236
0
}
237
238
// x264's SWAR version of satd 8x4, performs two 4x4 SATDs at once
239
static int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
240
0
{
241
0
    sum2_t tmp[4][4];
242
0
    sum2_t a0, a1, a2, a3;
243
0
    sum2_t sum = 0;
244
245
0
    for (int i = 0; i < 4; i++, pix1 += stride_pix1, pix2 += stride_pix2)
246
0
    {
247
0
        a0 = (pix1[0] - pix2[0]) + ((sum2_t)(pix1[4] - pix2[4]) << BITS_PER_SUM);
248
0
        a1 = (pix1[1] - pix2[1]) + ((sum2_t)(pix1[5] - pix2[5]) << BITS_PER_SUM);
249
0
        a2 = (pix1[2] - pix2[2]) + ((sum2_t)(pix1[6] - pix2[6]) << BITS_PER_SUM);
250
0
        a3 = (pix1[3] - pix2[3]) + ((sum2_t)(pix1[7] - pix2[7]) << BITS_PER_SUM);
251
0
        HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3);
252
0
    }
253
254
0
    for (int i = 0; i < 4; i++)
255
0
    {
256
0
        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
257
0
        sum += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
258
0
    }
259
260
0
    return (((sum_t)sum) + (sum >> BITS_PER_SUM)) >> 1;
261
0
}
262
263
template<int w, int h>
264
// calculate satd in blocks of 4x4
265
int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
266
0
{
267
0
    int satd = 0;
268
269
0
    for (int row = 0; row < h; row += 4)
270
0
        for (int col = 0; col < w; col += 4)
271
0
            satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1,
272
0
                             pix2 + row * stride_pix2 + col, stride_pix2);
273
274
0
    return satd;
275
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<12, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<16, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<16, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<8, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<8, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<12, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 32>(unsigned char const*, long, unsigned char const*, long)
276
277
template<int w, int h>
278
// calculate satd in blocks of 8x4
279
int satd8(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
280
0
{
281
0
    int satd = 0;
282
283
0
    for (int row = 0; row < h; row += 4)
284
0
        for (int col = 0; col < w; col += 8)
285
0
            satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1,
286
0
                             pix2 + row * stride_pix2 + col, stride_pix2);
287
288
0
    return satd;
289
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 12>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 4>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 24>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<24, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 48>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<48, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 24>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 48>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<24, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 64>(unsigned char const*, long, unsigned char const*, long)
290
291
inline int _sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
292
0
{
293
0
    sum2_t tmp[8][4];
294
0
    sum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
295
0
    sum2_t sum = 0;
296
297
0
    for (int i = 0; i < 8; i++, pix1 += i_pix1, pix2 += i_pix2)
298
0
    {
299
0
        a0 = pix1[0] - pix2[0];
300
0
        a1 = pix1[1] - pix2[1];
301
0
        b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM);
302
0
        a2 = pix1[2] - pix2[2];
303
0
        a3 = pix1[3] - pix2[3];
304
0
        b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM);
305
0
        a4 = pix1[4] - pix2[4];
306
0
        a5 = pix1[5] - pix2[5];
307
0
        b2 = (a4 + a5) + ((a4 - a5) << BITS_PER_SUM);
308
0
        a6 = pix1[6] - pix2[6];
309
0
        a7 = pix1[7] - pix2[7];
310
0
        b3 = (a6 + a7) + ((a6 - a7) << BITS_PER_SUM);
311
0
        HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], b0, b1, b2, b3);
312
0
    }
313
314
0
    for (int i = 0; i < 4; i++)
315
0
    {
316
0
        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
317
0
        HADAMARD4(a4, a5, a6, a7, tmp[4][i], tmp[5][i], tmp[6][i], tmp[7][i]);
318
0
        b0  = abs2(a0 + a4) + abs2(a0 - a4);
319
0
        b0 += abs2(a1 + a5) + abs2(a1 - a5);
320
0
        b0 += abs2(a2 + a6) + abs2(a2 - a6);
321
0
        b0 += abs2(a3 + a7) + abs2(a3 - a7);
322
0
        sum += (sum_t)b0 + (b0 >> BITS_PER_SUM);
323
0
    }
324
325
0
    return (int)sum;
326
0
}
327
328
inline int sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
329
0
{
330
0
    return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
331
0
}
332
333
static int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
334
0
{
335
0
    int sum = _sa8d_8x8(pix1, i_pix1, pix2, i_pix2)
336
0
        + _sa8d_8x8(pix1 + 8, i_pix1, pix2 + 8, i_pix2)
337
0
        + _sa8d_8x8(pix1 + 8 * i_pix1, i_pix1, pix2 + 8 * i_pix2, i_pix2)
338
0
        + _sa8d_8x8(pix1 + 8 + 8 * i_pix1, i_pix1, pix2 + 8 + 8 * i_pix2, i_pix2);
339
340
    // This matches x264 sa8d_16x16, but is slightly different from HM's behavior because
341
    // this version only rounds once at the end
342
0
    return (sum + 2) >> 2;
343
0
}
344
345
template<int w, int h>
346
// Calculate sa8d in blocks of 8x8
347
int sa8d8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
348
0
{
349
0
    int cost = 0;
350
351
0
    for (int y = 0; y < h; y += 8)
352
0
        for (int x = 0; x < w; x += 8)
353
0
            cost += sa8d_8x8(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
354
355
0
    return cost;
356
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d8<8, 8>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d8<8, 16>(unsigned char const*, long, unsigned char const*, long)
357
358
template<int w, int h>
359
// Calculate sa8d in blocks of 16x16
360
int sa8d16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
361
0
{
362
0
    int cost = 0;
363
364
0
    for (int y = 0; y < h; y += 16)
365
0
        for (int x = 0; x < w; x += 16)
366
0
            cost += sa8d_16x16(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
367
368
0
    return cost;
369
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<32, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<64, 64>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<16, 16>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<16, 32>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<32, 64>(unsigned char const*, long, unsigned char const*, long)
370
371
template<int size>
372
sse_t pixel_ssd_s_c(const int16_t* a, intptr_t dstride)
373
0
{
374
0
    sse_t sum = 0;
375
0
    for (int y = 0; y < size; y++)
376
0
    {
377
0
        for (int x = 0; x < size; x++)
378
0
            sum += a[x] * a[x];
379
380
0
        a += dstride;
381
0
    }
382
0
    return sum;
383
0
}
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<4>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<8>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<16>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<32>(short const*, long)
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<64>(short const*, long)
384
385
template<int size>
386
void blockfill_s_c(int16_t* dst, intptr_t dstride, int16_t val)
387
0
{
388
0
    for (int y = 0; y < size; y++)
389
0
        for (int x = 0; x < size; x++)
390
0
            dst[y * dstride + x] = val;
391
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<4>(short*, long, short)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<8>(short*, long, short)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<16>(short*, long, short)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<32>(short*, long, short)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<64>(short*, long, short)
392
393
template<int size>
394
void cpy2Dto1D_shl(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift)
395
0
{
396
0
    X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n");
397
0
    X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n");
398
0
    X265_CHECK(shift >= 0, "invalid shift\n");
399
400
0
    for (int i = 0; i < size; i++)
401
0
    {
402
0
        for (int j = 0; j < size; j++)
403
0
            dst[j] = src[j] << shift;
404
405
0
        src += srcStride;
406
0
        dst += size;
407
0
    }
408
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<64>(short*, short const*, long, int)
409
410
template<int size>
411
void cpy2Dto1D_shr(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift)
412
0
{
413
0
    X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n");
414
0
    X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n");
415
0
    X265_CHECK(shift > 0, "invalid shift\n");
416
417
0
    int16_t round = 1 << (shift - 1);
418
0
    for (int i = 0; i < size; i++)
419
0
    {
420
0
        for (int j = 0; j < size; j++)
421
0
            dst[j] = (src[j] + round) >> shift;
422
423
0
        src += srcStride;
424
0
        dst += size;
425
0
    }
426
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<64>(short*, short const*, long, int)
427
428
template<int size>
429
void cpy1Dto2D_shl(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift)
430
0
{
431
0
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
432
0
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
433
0
    X265_CHECK(shift >= 0, "invalid shift\n");
434
435
0
    for (int i = 0; i < size; i++)
436
0
    {
437
0
        for (int j = 0; j < size; j++)
438
0
            dst[j] = src[j] << shift;
439
440
0
        src += size;
441
0
        dst += dstStride;
442
0
    }
443
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<64>(short*, short const*, long, int)
444
445
template<int size>
446
void cpy1Dto2D_shr(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift)
447
0
{
448
0
    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
449
0
    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
450
0
    X265_CHECK(shift > 0, "invalid shift\n");
451
452
0
    int16_t round = 1 << (shift - 1);
453
0
    for (int i = 0; i < size; i++)
454
0
    {
455
0
        for (int j = 0; j < size; j++)
456
0
            dst[j] = (src[j] + round) >> shift;
457
458
0
        src += size;
459
0
        dst += dstStride;
460
0
    }
461
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<4>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<8>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<16>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<32>(short*, short const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<64>(short*, short const*, long, int)
462
463
template<int blockSize>
464
void getResidual(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride)
465
0
{
466
0
    for (int y = 0; y < blockSize; y++)
467
0
    {
468
0
        for (int x = 0; x < blockSize; x++)
469
0
            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
470
471
0
        fenc += stride;
472
0
        residual += stride;
473
0
        pred += stride;
474
0
    }
475
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<4>(unsigned char const*, unsigned char const*, short*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<8>(unsigned char const*, unsigned char const*, short*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<16>(unsigned char const*, unsigned char const*, short*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<32>(unsigned char const*, unsigned char const*, short*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<64>(unsigned char const*, unsigned char const*, short*, long)
476
477
template<int blockSize>
478
void transpose(pixel* dst, const pixel* src, intptr_t stride)
479
0
{
480
0
    for (int k = 0; k < blockSize; k++)
481
0
        for (int l = 0; l < blockSize; l++)
482
0
            dst[k * blockSize + l] = src[l * stride + k];
483
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<4>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<8>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<16>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<32>(unsigned char*, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<64>(unsigned char*, unsigned char const*, long)
484
485
static void weight_sp_c(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
486
0
{
487
0
    int x, y;
488
489
#if CHECKED_BUILD || _DEBUG
490
    const int correction = (IF_INTERNAL_PREC - X265_DEPTH);
491
    X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n");
492
    X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n");
493
    X265_CHECK((shift >= correction), "shift must be include factor correction, please update ASM ABI\n");
494
#endif
495
496
0
    for (y = 0; y <= height - 1; y++)
497
0
    {
498
0
        for (x = 0; x <= width - 1; )
499
0
        {
500
            // note: width can be odd
501
0
            dst[x] = x265_clip(((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
502
0
            x++;
503
0
        }
504
505
0
        src += srcStride;
506
0
        dst += dstStride;
507
0
    }
508
0
}
509
510
static void weight_pp_c(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
511
0
{
512
0
    int x, y;
513
514
0
    const int correction = (IF_INTERNAL_PREC - X265_DEPTH);
515
516
0
    X265_CHECK(!(width & 15), "weightp alignment error\n");
517
0
    X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n");
518
0
    X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n");
519
0
    X265_CHECK((shift >= correction), "shift must be include factor correction, please update ASM ABI\n");
520
0
    X265_CHECK(!(round & ((1 << correction) - 1)), "round must be include factor correction, please update ASM ABI\n");
521
522
0
    for (y = 0; y <= height - 1; y++)
523
0
    {
524
0
        for (x = 0; x <= width - 1; )
525
0
        {
526
            // simulating pixel to short conversion
527
0
            int16_t val = src[x] << correction;
528
0
            dst[x] = x265_clip(((w0 * (val) + round) >> shift) + offset);
529
0
            x++;
530
0
        }
531
532
0
        src += stride;
533
0
        dst += stride;
534
0
    }
535
0
}
536
537
template<int lx, int ly>
538
void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)
539
0
{
540
0
    for (int y = 0; y < ly; y++)
541
0
    {
542
0
        for (int x = 0; x < lx; x++)
543
0
            dst[x] = (src0[x] + src1[x] + 1) >> 1;
544
545
0
        src0 += sstride0;
546
0
        src1 += sstride1;
547
0
        dst += dstride;
548
0
    }
549
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 12>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<12, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 24>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<24, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 48>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<48, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int)
550
551
static void scale1D_128to64(pixel *dst, const pixel *src)
552
0
{
553
0
    int x;
554
0
    const pixel* src1 = src;
555
0
    const pixel* src2 = src + 128;
556
557
0
    pixel* dst1 = dst;
558
0
    pixel* dst2 = dst + 64/*128*/;
559
560
0
    for (x = 0; x < 128; x += 2)
561
0
    {
562
        // Top pixel
563
0
        pixel pix0 = src1[(x + 0)];
564
0
        pixel pix1 = src1[(x + 1)];
565
566
        // Left pixel
567
0
        pixel pix2 = src2[(x + 0)];
568
0
        pixel pix3 = src2[(x + 1)];
569
0
        int sum1 = pix0 + pix1;
570
0
        int sum2 = pix2 + pix3;
571
572
0
        dst1[x >> 1] = (pixel)((sum1 + 1) >> 1);
573
0
        dst2[x >> 1] = (pixel)((sum2 + 1) >> 1);
574
0
    }
575
0
}
576
577
static void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
578
0
{
579
0
    uint32_t x, y;
580
581
0
    for (y = 0; y < 64; y += 2)
582
0
    {
583
0
        for (x = 0; x < 64; x += 2)
584
0
        {
585
0
            pixel pix0 = src[(y + 0) * stride + (x + 0)];
586
0
            pixel pix1 = src[(y + 0) * stride + (x + 1)];
587
0
            pixel pix2 = src[(y + 1) * stride + (x + 0)];
588
0
            pixel pix3 = src[(y + 1) * stride + (x + 1)];
589
0
            int sum = pix0 + pix1 + pix2 + pix3;
590
591
0
            dst[y / 2 * 32 + x / 2] = (pixel)((sum + 2) >> 2);
592
0
        }
593
0
    }
594
0
}
595
596
static
597
void frame_init_lowres_core(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc,
598
                            intptr_t src_stride, intptr_t dst_stride, int width, int height)
599
0
{
600
0
    for (int y = 0; y < height; y++)
601
0
    {
602
0
        const pixel* src1 = src0 + src_stride;
603
0
        const pixel* src2 = src1 + src_stride;
604
0
        for (int x = 0; x < width; x++)
605
0
        {
606
            // slower than naive bilinear, but matches asm
607
0
#define FILTER(a, b, c, d) ((((a + b + 1) >> 1) + ((c + d + 1) >> 1) + 1) >> 1)
608
0
            dst0[x] = FILTER(src0[2 * x], src1[2 * x], src0[2 * x + 1], src1[2 * x + 1]);
609
0
            dsth[x] = FILTER(src0[2 * x + 1], src1[2 * x + 1], src0[2 * x + 2], src1[2 * x + 2]);
610
0
            dstv[x] = FILTER(src1[2 * x], src2[2 * x], src1[2 * x + 1], src2[2 * x + 1]);
611
0
            dstc[x] = FILTER(src1[2 * x + 1], src2[2 * x + 1], src1[2 * x + 2], src2[2 * x + 2]);
612
0
#undef FILTER
613
0
        }
614
0
        src0 += src_stride * 2;
615
0
        dst0 += dst_stride;
616
0
        dsth += dst_stride;
617
0
        dstv += dst_stride;
618
0
        dstc += dst_stride;
619
0
    }
620
0
}
621
622
static
623
void frame_subsample_luma(const pixel* src0, pixel* dst0, intptr_t src_stride, intptr_t dst_stride, int width, int height)
624
0
{
625
0
    for (int y = 0; y < height; y++, src0 += 2 * src_stride, dst0 += dst_stride)
626
0
    {
627
0
        const pixel *inRow = src0;
628
0
        const pixel *inRowBelow = src0 + src_stride;
629
0
        pixel *target = dst0;
630
0
        for (int x = 0; x < width; x++)
631
0
        {
632
0
            target[x] = (((inRow[0] + inRowBelow[0] + 1) >> 1) + ((inRow[1] + inRowBelow[1] + 1) >> 1) + 1) >> 1;
633
0
            inRow += 2;
634
0
            inRowBelow += 2;
635
0
        }
636
0
    }
637
0
}
638
639
/* structural similarity metric */
640
static void ssim_4x4x2_core(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4])
641
0
{
642
0
    for (int z = 0; z < 2; z++)
643
0
    {
644
0
        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
645
0
        for (int y = 0; y < 4; y++)
646
0
        {
647
0
            for (int x = 0; x < 4; x++)
648
0
            {
649
0
                int a = pix1[x + y * stride1];
650
0
                int b = pix2[x + y * stride2];
651
0
                s1 += a;
652
0
                s2 += b;
653
0
                ss += a * a;
654
0
                ss += b * b;
655
0
                s12 += a * b;
656
0
            }
657
0
        }
658
659
0
        sums[z][0] = s1;
660
0
        sums[z][1] = s2;
661
0
        sums[z][2] = ss;
662
0
        sums[z][3] = s12;
663
0
        pix1 += 4;
664
0
        pix2 += 4;
665
0
    }
666
0
}
667
668
static float ssim_end_1(int s1, int s2, int ss, int s12)
669
0
{
670
/* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases.
671
 * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
672
 * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */
673
674
#if HIGH_BIT_DEPTH
675
    X265_CHECK((X265_DEPTH == 10) || (X265_DEPTH == 12), "ssim invalid depth\n");
676
#define type float
677
    static const float ssim_c1 = (float)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64);
678
    static const float ssim_c2 = (float)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63);
679
#else
680
0
    X265_CHECK(X265_DEPTH == 8, "ssim invalid depth\n");
681
0
#define type int
682
0
    static const int ssim_c1 = (int)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64 + .5);
683
0
    static const int ssim_c2 = (int)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63 + .5);
684
0
#endif
685
0
    type fs1 = (type)s1;
686
0
    type fs2 = (type)s2;
687
0
    type fss = (type)ss;
688
0
    type fs12 = (type)s12;
689
0
    type vars = (type)(fss * 64 - fs1 * fs1 - fs2 * fs2);
690
0
    type covar = (type)(fs12 * 64 - fs1 * fs2);
691
0
    return (float)(2 * fs1 * fs2 + ssim_c1) * (float)(2 * covar + ssim_c2)
692
0
           / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + ssim_c2));
693
0
#undef type
694
0
#undef PIXEL_MAX
695
0
}
696
697
static float ssim_end_4(int sum0[5][4], int sum1[5][4], int width)
698
0
{
699
0
    float ssim = 0.0;
700
701
0
    for (int i = 0; i < width; i++)
702
0
    {
703
0
        ssim += ssim_end_1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] + sum1[i + 1][0],
704
0
                           sum0[i][1] + sum0[i + 1][1] + sum1[i][1] + sum1[i + 1][1],
705
0
                           sum0[i][2] + sum0[i + 1][2] + sum1[i][2] + sum1[i + 1][2],
706
0
                           sum0[i][3] + sum0[i + 1][3] + sum1[i][3] + sum1[i + 1][3]);
707
0
    }
708
709
0
    return ssim;
710
0
}
711
712
template<int size>
713
uint64_t pixel_var(const pixel* pix, intptr_t i_stride)
714
0
{
715
0
    uint32_t sum = 0, sqr = 0;
716
717
0
    for (int y = 0; y < size; y++)
718
0
    {
719
0
        for (int x = 0; x < size; x++)
720
0
        {
721
0
            sum += pix[x];
722
0
            sqr += pix[x] * pix[x];
723
0
        }
724
725
0
        pix += i_stride;
726
0
    }
727
728
0
    return sum + ((uint64_t)sqr << 32);
729
0
}
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<4>(unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<8>(unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<16>(unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<32>(unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<64>(unsigned char const*, long)
730
731
#if defined(_MSC_VER)
732
#pragma warning(disable: 4127) // conditional expression is constant
733
#endif
734
735
template<int size>
736
int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride)
737
0
{
738
0
    static pixel zeroBuf[8] /* = { 0 } */;
739
740
0
    if (size)
741
0
    {
742
0
        int dim = 1 << (size + 2);
743
0
        uint32_t totEnergy = 0;
744
0
        for (int i = 0; i < dim; i += 8)
745
0
        {
746
0
            for (int j = 0; j < dim; j+= 8)
747
0
            {
748
                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
749
0
                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
750
0
                                   (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
751
0
                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
752
0
                                   (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
753
754
0
                totEnergy += abs(sourceEnergy - reconEnergy);
755
0
            }
756
0
        }
757
0
        return totEnergy;
758
0
    }
759
0
    else
760
0
    {
761
        /* 4x4 is too small for sa8d */
762
0
        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
763
0
        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
764
0
        return abs(sourceEnergy - reconEnergy);
765
0
    }
766
0
}
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<0>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<1>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<2>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<3>(unsigned char const*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<4>(unsigned char const*, long, unsigned char const*, long)
767
768
template<int bx, int by>
769
void blockcopy_pp_c(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb)
770
0
{
771
0
    for (int y = 0; y < by; y++)
772
0
    {
773
0
        for (int x = 0; x < bx; x++)
774
0
            a[x] = b[x];
775
776
0
        a += stridea;
777
0
        b += strideb;
778
0
    }
779
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 12>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<12, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 24>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<24, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 48>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<48, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 2>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 4>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 2>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 6>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<6, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 2>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 8>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 12>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<6, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 16>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 24>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<12, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 32>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 48>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<24, 64>(unsigned char*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 64>(unsigned char*, long, unsigned char const*, long)
780
781
template<int bx, int by>
782
void blockcopy_ss_c(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb)
783
0
{
784
0
    for (int y = 0; y < by; y++)
785
0
    {
786
0
        for (int x = 0; x < bx; x++)
787
0
            a[x] = b[x];
788
789
0
        a += stridea;
790
0
        b += strideb;
791
0
    }
792
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<4, 4>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<8, 8>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<16, 16>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<32, 32>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<64, 64>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<2, 2>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<2, 4>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<4, 8>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<8, 16>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<16, 32>(short*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<32, 64>(short*, long, short const*, long)
793
794
template<int bx, int by>
795
void blockcopy_sp_c(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb)
796
0
{
797
0
    for (int y = 0; y < by; y++)
798
0
    {
799
0
        for (int x = 0; x < bx; x++)
800
0
        {
801
0
            X265_CHECK((b[x] >= 0) && (b[x] <= ((1 << X265_DEPTH) - 1)), "blockcopy pixel size fail\n");
802
0
            a[x] = (pixel)b[x];
803
0
        }
804
805
0
        a += stridea;
806
0
        b += strideb;
807
0
    }
808
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<4, 4>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<8, 8>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<16, 16>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<32, 32>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<64, 64>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<2, 2>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<2, 4>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<4, 8>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<8, 16>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<16, 32>(unsigned char*, long, short const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<32, 64>(unsigned char*, long, short const*, long)
809
810
template<int bx, int by>
811
void blockcopy_ps_c(int16_t* a, intptr_t stridea, const pixel* b, intptr_t strideb)
812
0
{
813
0
    for (int y = 0; y < by; y++)
814
0
    {
815
0
        for (int x = 0; x < bx; x++)
816
0
            a[x] = (int16_t)b[x];
817
818
0
        a += stridea;
819
0
        b += strideb;
820
0
    }
821
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<4, 4>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<8, 8>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<16, 16>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<32, 32>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<64, 64>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<2, 2>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<2, 4>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<4, 8>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<8, 16>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<16, 32>(short*, long, unsigned char const*, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<32, 64>(short*, long, unsigned char const*, long)
822
823
template<int bx, int by>
824
void pixel_sub_ps_c(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1)
825
0
{
826
0
    for (int y = 0; y < by; y++)
827
0
    {
828
0
        for (int x = 0; x < bx; x++)
829
0
            a[x] = (int16_t)(b0[x] - b1[x]);
830
831
0
        b0 += sstride0;
832
0
        b1 += sstride1;
833
0
        a += dstride;
834
0
    }
835
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<4, 4>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<8, 8>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<16, 16>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<32, 32>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<64, 64>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<2, 2>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<2, 4>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<4, 8>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<8, 16>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<16, 32>(short*, long, unsigned char const*, unsigned char const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<32, 64>(short*, long, unsigned char const*, unsigned char const*, long, long)
836
837
template<int bx, int by>
838
void pixel_add_ps_c(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1)
839
0
{
840
0
    for (int y = 0; y < by; y++)
841
0
    {
842
0
        for (int x = 0; x < bx; x++)
843
0
            a[x] = x265_clip(b0[x] + b1[x]);
844
845
0
        b0 += sstride0;
846
0
        b1 += sstride1;
847
0
        a += dstride;
848
0
    }
849
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<4, 4>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<8, 8>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<16, 16>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<32, 32>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<64, 64>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<2, 2>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<2, 4>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<4, 8>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<8, 16>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<16, 32>(unsigned char*, long, unsigned char const*, short const*, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<32, 64>(unsigned char*, long, unsigned char const*, short const*, long, long)
850
851
template<int bx, int by>
852
void addAvg(const int16_t* src0, const int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
853
0
{
854
0
    int shiftNum, offset;
855
856
0
    shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
857
0
    offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
858
859
0
    for (int y = 0; y < by; y++)
860
0
    {
861
0
        for (int x = 0; x < bx; x += 2)
862
0
        {
863
0
            dst[x + 0] = x265_clip((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
864
0
            dst[x + 1] = x265_clip((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
865
0
        }
866
867
0
        src0 += src0Stride;
868
0
        src1 += src1Stride;
869
0
        dst  += dstStride;
870
0
    }
871
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 12>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<12, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 24>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<24, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 48>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<48, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 2>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 4>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 2>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 6>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<6, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 2>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 8>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 12>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<6, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 16>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 24>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<12, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 32>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 48>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<24, 64>(short const*, short const*, unsigned char*, long, long, long)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 64>(short const*, short const*, unsigned char*, long, long, long)
872
873
static void planecopy_cp_c(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
874
0
{
875
0
    for (int r = 0; r < height; r++)
876
0
    {
877
0
        for (int c = 0; c < width; c++)
878
0
            dst[c] = ((pixel)src[c]) << shift;
879
880
0
        dst += dstStride;
881
0
        src += srcStride;
882
0
    }
883
0
}
884
885
static void planecopy_sp_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
886
0
{
887
0
    for (int r = 0; r < height; r++)
888
0
    {
889
0
        for (int c = 0; c < width; c++)
890
0
            dst[c] = (pixel)((src[c] >> shift) & mask);
891
892
0
        dst += dstStride;
893
0
        src += srcStride;
894
0
    }
895
0
}
896
897
static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
898
0
{
899
0
    for (int r = 0; r < height; r++)
900
0
    {
901
0
        for (int c = 0; c < width; c++)
902
0
            dst[c] = (pixel)((src[c] >> shift));
903
904
0
        dst += dstStride;
905
0
        src += srcStride;
906
0
    }
907
0
}
908
909
static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
910
0
{
911
0
    for (int r = 0; r < height; r++)
912
0
    {
913
0
        for (int c = 0; c < width; c++)
914
0
            dst[c] = (pixel)((src[c] << shift) & mask);
915
916
0
        dst += dstStride;
917
0
        src += srcStride;
918
0
    }
919
0
}
920
921
/* Estimate the total amount of influence on future quality that could be had if we
922
 * were to improve the reference samples used to inter predict any given CU. */
923
static void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
924
                                    const int32_t* invQscales, const double* fpsFactor, int len)
925
0
{
926
0
    double fps = *fpsFactor / 256;  // range[0.01, 1.00]
927
0
    for (int i = 0; i < len; i++)
928
0
    {
929
0
        int intraCost = intraCosts[i];
930
0
        int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
931
0
        double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
932
0
        double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
933
0
        double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
934
935
#if 0
936
        // algorithm that output match to asm
937
        float intraRcp = (float)1.0f / intraCost;   // VC can't mapping this into RCPPS
938
        float intraRcpError1 = (float)intraCost * (float)intraRcp;
939
        intraRcpError1 *= (float)intraRcp;
940
        float intraRcpError2 = intraRcp + intraRcp;
941
        float propagateDenom = intraRcpError2 - intraRcpError1;
942
        dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5);
943
#else
944
0
        double propagateDenom = (double)intraCost;             // Q32
945
0
        dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);
946
0
#endif
947
0
        }
948
    //}
949
0
}
950
951
/* Conversion between double and Q8.8 fixed point (big-endian) for storage */
952
static void cuTreeFix8Pack(uint16_t *dst, double *src, int count)
953
0
{
954
0
    for (int i = 0; i < count; i++)
955
0
        dst[i] = (uint16_t)(int16_t)(src[i] * 256.0);
956
0
}
957
958
static void cuTreeFix8Unpack(double *dst, uint16_t *src, int count)
959
0
{
960
0
    for (int i = 0; i < count; i++)
961
0
    {
962
0
        int16_t qpFix8 = src[i];
963
0
        dst[i] = (double)(qpFix8) / 256.0;
964
0
    }
965
0
}
966
967
template<int log2TrSize>
968
static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k)
969
0
{
970
0
    *ssBlock = 0;
971
0
    int trSize = 1 << log2TrSize;
972
0
    for (int y = 0; y < trSize; y++)
973
0
    {
974
0
        for (int x = 0; x < trSize; x++)
975
0
        {
976
0
            int temp = fenc[y * fStride + x] - recon[y * rstride + x]; // copy of residual coeff
977
0
            *ssBlock += temp * temp;
978
0
        }
979
0
    }
980
981
0
    *ac_k = 0;
982
0
    for (int block_yy = 0; block_yy < trSize; block_yy += 1)
983
0
    {
984
0
        for (int block_xx = 0; block_xx < trSize; block_xx += 1)
985
0
        {
986
0
            uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift;
987
0
            *ac_k += temp * temp;
988
0
        }
989
0
    }
990
0
}
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<2>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<3>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<4>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<5>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<6>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*)
991
992
static void normFact_c(const pixel* src, uint32_t blockSize, int shift, uint64_t *z_k)
993
0
{
994
0
    *z_k = 0;
995
0
    for (uint32_t block_yy = 0; block_yy < blockSize; block_yy += 1)
996
0
    {
997
0
        for (uint32_t block_xx = 0; block_xx < blockSize; block_xx += 1)
998
0
        {
999
0
            uint32_t temp = src[block_yy * blockSize + block_xx] >> shift;
1000
0
            *z_k += temp * temp;
1001
0
        }
1002
0
    }
1003
0
}
1004
1005
#if HIGH_BIT_DEPTH
1006
static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, 
1007
                               const pixel minPix, const pixel maxPix)
1008
{
1009
    pixel maxLumaLevel = 0;
1010
    uint64_t sumLuma = 0;
1011
1012
    for (int r = 0; r < height; r++)
1013
    {
1014
        for (int c = 0; c < width; c++)
1015
        {
1016
            /* Clip luma of source picture to max and min*/
1017
            src[c] = x265_clip3((pixel)minPix, (pixel)maxPix, src[c]);
1018
            maxLumaLevel = X265_MAX(src[c], maxLumaLevel);
1019
            sumLuma += src[c];
1020
        }
1021
        src += stride;
1022
    }
1023
    *outsum = sumLuma;
1024
    return maxLumaLevel;
1025
}
1026
1027
#endif
1028
}  // end anonymous namespace
1029
1030
namespace X265_NS {
1031
// x265 private namespace
1032
1033
/* Extend the edges of a picture so that it may safely be used for motion
1034
 * compensation. This function assumes the picture is stored in a buffer with
1035
 * sufficient padding for the X and Y margins */
1036
void extendPicBorder(pixel* pic, intptr_t stride, int width, int height, int marginX, int marginY)
1037
0
{
1038
    /* extend left and right margins */
1039
0
    primitives.extendRowBorder(pic, stride, width, height, marginX);
1040
1041
    /* copy top row to create above margin */
1042
0
    pixel* top = pic - marginX;
1043
0
    for (int y = 0; y < marginY; y++)
1044
0
        memcpy(top - (y + 1) * stride, top, stride * sizeof(pixel));
1045
1046
    /* copy bottom row to create below margin */
1047
0
    pixel* bot = pic - marginX + (height - 1) * stride;
1048
0
    for (int y = 0; y < marginY; y++)
1049
0
        memcpy(bot + (y + 1) * stride, bot, stride * sizeof(pixel));
1050
0
}
1051
1052
/* Initialize entries for pixel functions defined in this file */
1053
void setupPixelPrimitives_c(EncoderPrimitives &p)
1054
0
{
1055
0
#define LUMA_PU(W, H) \
1056
0
    p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
1057
0
    p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg<W, H>; \
1058
0
    p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg<W, H>; \
1059
0
    p.pu[LUMA_ ## W ## x ## H].sad = sad<W, H>; \
1060
0
    p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3<W, H>; \
1061
0
    p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4<W, H>; \
1062
0
    p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp<W, H>; \
1063
0
    p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[ALIGNED] = pixelavg_pp<W, H>;
1064
0
#define LUMA_CU(W, H) \
1065
0
    p.cu[BLOCK_ ## W ## x ## H].sub_ps        = pixel_sub_ps_c<W, H>; \
1066
0
    p.cu[BLOCK_ ## W ## x ## H].add_ps[NONALIGNED]    = pixel_add_ps_c<W, H>; \
1067
0
    p.cu[BLOCK_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>; \
1068
0
    p.cu[BLOCK_ ## W ## x ## H].copy_sp       = blockcopy_sp_c<W, H>; \
1069
0
    p.cu[BLOCK_ ## W ## x ## H].copy_ps       = blockcopy_ps_c<W, H>; \
1070
0
    p.cu[BLOCK_ ## W ## x ## H].copy_ss       = blockcopy_ss_c<W, H>; \
1071
0
    p.cu[BLOCK_ ## W ## x ## H].blockfill_s[NONALIGNED] = blockfill_s_c<W>;  \
1072
0
    p.cu[BLOCK_ ## W ## x ## H].blockfill_s[ALIGNED]    = blockfill_s_c<W>;  \
1073
0
    p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shl = cpy2Dto1D_shl<W>; \
1074
0
    p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shr = cpy2Dto1D_shr<W>; \
1075
0
    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[NONALIGNED] = cpy1Dto2D_shl<W>; \
1076
0
    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[ALIGNED] = cpy1Dto2D_shl<W>; \
1077
0
    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shr = cpy1Dto2D_shr<W>; \
1078
0
    p.cu[BLOCK_ ## W ## x ## H].psy_cost_pp   = psyCost_pp<BLOCK_ ## W ## x ## H>; \
1079
0
    p.cu[BLOCK_ ## W ## x ## H].transpose     = transpose<W>; \
1080
0
    p.cu[BLOCK_ ## W ## x ## H].ssd_s[NONALIGNED]         = pixel_ssd_s_c<W>; \
1081
0
    p.cu[BLOCK_ ## W ## x ## H].ssd_s[ALIGNED] = pixel_ssd_s_c<W>; \
1082
0
    p.cu[BLOCK_ ## W ## x ## H].var           = pixel_var<W>; \
1083
0
    p.cu[BLOCK_ ## W ## x ## H].calcresidual[NONALIGNED]  = getResidual<W>; \
1084
0
    p.cu[BLOCK_ ## W ## x ## H].calcresidual[ALIGNED]     = getResidual<W>; \
1085
0
    p.cu[BLOCK_ ## W ## x ## H].sse_pp        = sse<W, H, pixel, pixel>; \
1086
0
    p.cu[BLOCK_ ## W ## x ## H].sse_ss        = sse<W, H, int16_t, int16_t>;
1087
1088
0
    LUMA_PU(4, 4);
1089
0
    LUMA_PU(8, 8);
1090
0
    LUMA_PU(16, 16);
1091
0
    LUMA_PU(32, 32);
1092
0
    LUMA_PU(64, 64);
1093
0
    LUMA_PU(4, 8);
1094
0
    LUMA_PU(8, 4);
1095
0
    LUMA_PU(16,  8);
1096
0
    LUMA_PU(8, 16);
1097
0
    LUMA_PU(16, 12);
1098
0
    LUMA_PU(12, 16);
1099
0
    LUMA_PU(16,  4);
1100
0
    LUMA_PU(4, 16);
1101
0
    LUMA_PU(32, 16);
1102
0
    LUMA_PU(16, 32);
1103
0
    LUMA_PU(32, 24);
1104
0
    LUMA_PU(24, 32);
1105
0
    LUMA_PU(32,  8);
1106
0
    LUMA_PU(8, 32);
1107
0
    LUMA_PU(64, 32);
1108
0
    LUMA_PU(32, 64);
1109
0
    LUMA_PU(64, 48);
1110
0
    LUMA_PU(48, 64);
1111
0
    LUMA_PU(64, 16);
1112
0
    LUMA_PU(16, 64);
1113
1114
0
    p.pu[LUMA_4x4].ads = ads_x1<4, 4>;
1115
0
    p.pu[LUMA_8x8].ads = ads_x1<8, 8>;
1116
0
    p.pu[LUMA_8x4].ads = ads_x2<8, 4>;
1117
0
    p.pu[LUMA_4x8].ads = ads_x2<4, 8>;
1118
0
    p.pu[LUMA_16x16].ads = ads_x4<16, 16>;
1119
0
    p.pu[LUMA_16x8].ads = ads_x2<16, 8>;
1120
0
    p.pu[LUMA_8x16].ads = ads_x2<8, 16>;
1121
0
    p.pu[LUMA_16x12].ads = ads_x1<16, 12>;
1122
0
    p.pu[LUMA_12x16].ads = ads_x1<12, 16>;
1123
0
    p.pu[LUMA_16x4].ads = ads_x1<16, 4>;
1124
0
    p.pu[LUMA_4x16].ads = ads_x1<4, 16>;
1125
0
    p.pu[LUMA_32x32].ads = ads_x4<32, 32>;
1126
0
    p.pu[LUMA_32x16].ads = ads_x2<32, 16>;
1127
0
    p.pu[LUMA_16x32].ads = ads_x2<16, 32>;
1128
0
    p.pu[LUMA_32x24].ads = ads_x4<32, 24>;
1129
0
    p.pu[LUMA_24x32].ads = ads_x4<24, 32>;
1130
0
    p.pu[LUMA_32x8].ads = ads_x4<32, 8>;
1131
0
    p.pu[LUMA_8x32].ads = ads_x4<8, 32>;
1132
0
    p.pu[LUMA_64x64].ads = ads_x4<64, 64>;
1133
0
    p.pu[LUMA_64x32].ads = ads_x2<64, 32>;
1134
0
    p.pu[LUMA_32x64].ads = ads_x2<32, 64>;
1135
0
    p.pu[LUMA_64x48].ads = ads_x4<64, 48>;
1136
0
    p.pu[LUMA_48x64].ads = ads_x4<48, 64>;
1137
0
    p.pu[LUMA_64x16].ads = ads_x4<64, 16>;
1138
0
    p.pu[LUMA_16x64].ads = ads_x4<16, 64>;
1139
1140
0
    p.pu[LUMA_4x4].satd   = satd_4x4;
1141
0
    p.pu[LUMA_8x8].satd   = satd8<8, 8>;
1142
0
    p.pu[LUMA_8x4].satd   = satd_8x4;
1143
0
    p.pu[LUMA_4x8].satd   = satd4<4, 8>;
1144
0
    p.pu[LUMA_16x16].satd = satd8<16, 16>;
1145
0
    p.pu[LUMA_16x8].satd  = satd8<16, 8>;
1146
0
    p.pu[LUMA_8x16].satd  = satd8<8, 16>;
1147
0
    p.pu[LUMA_16x12].satd = satd8<16, 12>;
1148
0
    p.pu[LUMA_12x16].satd = satd4<12, 16>;
1149
0
    p.pu[LUMA_16x4].satd  = satd8<16, 4>;
1150
0
    p.pu[LUMA_4x16].satd  = satd4<4, 16>;
1151
0
    p.pu[LUMA_32x32].satd = satd8<32, 32>;
1152
0
    p.pu[LUMA_32x16].satd = satd8<32, 16>;
1153
0
    p.pu[LUMA_16x32].satd = satd8<16, 32>;
1154
0
    p.pu[LUMA_32x24].satd = satd8<32, 24>;
1155
0
    p.pu[LUMA_24x32].satd = satd8<24, 32>;
1156
0
    p.pu[LUMA_32x8].satd  = satd8<32, 8>;
1157
0
    p.pu[LUMA_8x32].satd  = satd8<8, 32>;
1158
0
    p.pu[LUMA_64x64].satd = satd8<64, 64>;
1159
0
    p.pu[LUMA_64x32].satd = satd8<64, 32>;
1160
0
    p.pu[LUMA_32x64].satd = satd8<32, 64>;
1161
0
    p.pu[LUMA_64x48].satd = satd8<64, 48>;
1162
0
    p.pu[LUMA_48x64].satd = satd8<48, 64>;
1163
0
    p.pu[LUMA_64x16].satd = satd8<64, 16>;
1164
0
    p.pu[LUMA_16x64].satd = satd8<16, 64>;
1165
1166
0
    LUMA_CU(4, 4);
1167
0
    LUMA_CU(8, 8);
1168
0
    LUMA_CU(16, 16);
1169
0
    LUMA_CU(32, 32);
1170
0
    LUMA_CU(64, 64);
1171
1172
0
    p.cu[BLOCK_4x4].sa8d   = satd_4x4;
1173
0
    p.cu[BLOCK_8x8].sa8d   = sa8d_8x8;
1174
0
    p.cu[BLOCK_16x16].sa8d = sa8d_16x16;
1175
0
    p.cu[BLOCK_32x32].sa8d = sa8d16<32, 32>;
1176
0
    p.cu[BLOCK_64x64].sa8d = sa8d16<64, 64>;
1177
1178
0
#define CHROMA_PU_420(W, H) \
1179
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[NONALIGNED]  = addAvg<W, H>;         \
1180
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[ALIGNED]  = addAvg<W, H>;         \
1181
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
1182
0
1183
0
    CHROMA_PU_420(2, 2);
1184
0
    CHROMA_PU_420(2, 4);
1185
0
    CHROMA_PU_420(4, 4);
1186
0
    CHROMA_PU_420(8, 8);
1187
0
    CHROMA_PU_420(16, 16);
1188
0
    CHROMA_PU_420(32, 32);
1189
0
    CHROMA_PU_420(4, 2);
1190
0
    CHROMA_PU_420(8, 4);
1191
0
    CHROMA_PU_420(4, 8);
1192
0
    CHROMA_PU_420(8, 6);
1193
0
    CHROMA_PU_420(6, 8);
1194
0
    CHROMA_PU_420(8, 2);
1195
0
    CHROMA_PU_420(2, 8);
1196
0
    CHROMA_PU_420(16, 8);
1197
0
    CHROMA_PU_420(8,  16);
1198
0
    CHROMA_PU_420(16, 12);
1199
0
    CHROMA_PU_420(12, 16);
1200
0
    CHROMA_PU_420(16, 4);
1201
0
    CHROMA_PU_420(4,  16);
1202
0
    CHROMA_PU_420(32, 16);
1203
0
    CHROMA_PU_420(16, 32);
1204
0
    CHROMA_PU_420(32, 24);
1205
0
    CHROMA_PU_420(24, 32);
1206
0
    CHROMA_PU_420(32, 8);
1207
0
    CHROMA_PU_420(8,  32);
1208
1209
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x2].satd   = NULL;
1210
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd   = satd_4x4;
1211
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd   = satd8<8, 8>;
1212
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = satd8<16, 16>;
1213
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = satd8<32, 32>;
1214
1215
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].satd   = NULL;
1216
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].satd   = NULL;
1217
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd   = satd_8x4;
1218
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd   = satd4<4, 8>;
1219
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd  = satd8<16, 8>;
1220
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].satd  = satd8<8, 16>;
1221
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = satd8<32, 16>;
1222
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = satd8<16, 32>;
1223
1224
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].satd   = NULL;
1225
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].satd   = NULL;
1226
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].satd   = NULL;
1227
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].satd   = NULL;
1228
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].satd = satd4<16, 12>;
1229
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = satd4<12, 16>;
1230
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].satd  = satd4<16, 4>;
1231
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd  = satd4<4, 16>;
1232
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = satd8<32, 24>;
1233
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].satd = satd8<24, 32>;
1234
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].satd  = satd8<32, 8>;
1235
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].satd  = satd8<8, 32>;
1236
1237
0
#define CHROMA_CU_420(W, H) \
1238
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sse_pp  = sse<W, H, pixel, pixel>; \
1239
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
1240
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
1241
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \
1242
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>;  \
1243
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \
1244
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>;
1245
1246
0
    CHROMA_CU_420(2, 2)
1247
0
    CHROMA_CU_420(4, 4)
1248
0
    CHROMA_CU_420(8, 8)
1249
0
    CHROMA_CU_420(16, 16)
1250
0
    CHROMA_CU_420(32, 32)
1251
1252
0
    p.chroma[X265_CSP_I420].cu[BLOCK_8x8].sa8d   = p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd;
1253
0
    p.chroma[X265_CSP_I420].cu[BLOCK_16x16].sa8d = sa8d8<8, 8>;
1254
0
    p.chroma[X265_CSP_I420].cu[BLOCK_32x32].sa8d = sa8d16<16, 16>;
1255
0
    p.chroma[X265_CSP_I420].cu[BLOCK_64x64].sa8d = sa8d16<32, 32>;
1256
1257
0
#define CHROMA_PU_422(W, H) \
1258
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[NONALIGNED]  = addAvg<W, H>;         \
1259
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[ALIGNED]  = addAvg<W, H>;         \
1260
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
1261
0
1262
0
    CHROMA_PU_422(2, 4);
1263
0
    CHROMA_PU_422(4, 8);
1264
0
    CHROMA_PU_422(8, 16);
1265
0
    CHROMA_PU_422(16, 32);
1266
0
    CHROMA_PU_422(32, 64);
1267
0
    CHROMA_PU_422(4, 4);
1268
0
    CHROMA_PU_422(2, 8);
1269
0
    CHROMA_PU_422(8, 8);
1270
0
    CHROMA_PU_422(4, 16);
1271
0
    CHROMA_PU_422(8, 12);
1272
0
    CHROMA_PU_422(6, 16);
1273
0
    CHROMA_PU_422(8, 4);
1274
0
    CHROMA_PU_422(2, 16);
1275
0
    CHROMA_PU_422(16, 16);
1276
0
    CHROMA_PU_422(8, 32);
1277
0
    CHROMA_PU_422(16, 24);
1278
0
    CHROMA_PU_422(12, 32);
1279
0
    CHROMA_PU_422(16, 8);
1280
0
    CHROMA_PU_422(4,  32);
1281
0
    CHROMA_PU_422(32, 32);
1282
0
    CHROMA_PU_422(16, 64);
1283
0
    CHROMA_PU_422(32, 48);
1284
0
    CHROMA_PU_422(24, 64);
1285
0
    CHROMA_PU_422(32, 16);
1286
0
    CHROMA_PU_422(8,  64);
1287
1288
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x4].satd   = NULL;
1289
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd   = satd4<4, 8>;
1290
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].satd  = satd8<8, 16>;
1291
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].satd = satd8<16, 32>;
1292
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].satd = satd8<32, 64>;
1293
1294
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd   = satd_4x4;
1295
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].satd   = NULL;
1296
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd   = satd8<8, 8>;
1297
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd  = satd4<4, 16>;
1298
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].satd = satd8<16, 16>;
1299
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].satd  = satd8<8, 32>;
1300
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].satd = satd8<32, 32>;
1301
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].satd = satd8<16, 64>;
1302
1303
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd  = satd4<8, 12>;
1304
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].satd  = NULL;
1305
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd   = satd4<8, 4>;
1306
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].satd  = NULL;
1307
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].satd = satd8<16, 24>;
1308
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = satd4<12, 32>;
1309
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].satd  = satd8<16, 8>;
1310
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd  = satd4<4, 32>;
1311
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].satd = satd8<32, 48>;
1312
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].satd = satd8<24, 64>;
1313
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = satd8<32, 16>;
1314
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd  = satd8<8, 64>;
1315
1316
0
#define CHROMA_CU_422(W, H) \
1317
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sse_pp  = sse<W, H, pixel, pixel>; \
1318
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
1319
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
1320
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \
1321
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
1322
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \
1323
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>;
1324
1325
0
    CHROMA_CU_422(2, 4)
1326
0
    CHROMA_CU_422(4, 8)
1327
0
    CHROMA_CU_422(8, 16)
1328
0
    CHROMA_CU_422(16, 32)
1329
0
    CHROMA_CU_422(32, 64)
1330
1331
0
    p.chroma[X265_CSP_I422].cu[BLOCK_8x8].sa8d   = p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd;
1332
0
    p.chroma[X265_CSP_I422].cu[BLOCK_16x16].sa8d = sa8d8<8, 16>;
1333
0
    p.chroma[X265_CSP_I422].cu[BLOCK_32x32].sa8d = sa8d16<16, 32>;
1334
0
    p.chroma[X265_CSP_I422].cu[BLOCK_64x64].sa8d = sa8d16<32, 64>;
1335
1336
0
    p.weight_pp = weight_pp_c;
1337
0
    p.weight_sp = weight_sp_c;
1338
1339
0
    p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64;
1340
0
    p.scale2D_64to32 = scale2D_64to32;
1341
0
    p.frameInitLowres = frame_init_lowres_core;
1342
0
    p.frameInitLowerRes = frame_init_lowres_core;
1343
0
    p.ssim_4x4x2_core = ssim_4x4x2_core;
1344
0
    p.ssim_end_4 = ssim_end_4;
1345
1346
0
    p.planecopy_cp = planecopy_cp_c;
1347
0
    p.planecopy_sp = planecopy_sp_c;
1348
0
    p.planecopy_sp_shl = planecopy_sp_shl_c;
1349
0
    p.planecopy_pp_shr = planecopy_pp_shr_c;
1350
#if HIGH_BIT_DEPTH
1351
    p.planeClipAndMax = planeClipAndMax_c;
1352
#endif
1353
0
    p.propagateCost = estimateCUPropagateCost;
1354
0
    p.fix8Unpack = cuTreeFix8Unpack;
1355
0
    p.fix8Pack = cuTreeFix8Pack;
1356
1357
0
    p.cu[BLOCK_4x4].ssimDist = ssimDist_c<2>;
1358
0
    p.cu[BLOCK_8x8].ssimDist = ssimDist_c<3>;
1359
0
    p.cu[BLOCK_16x16].ssimDist = ssimDist_c<4>;
1360
0
    p.cu[BLOCK_32x32].ssimDist = ssimDist_c<5>;
1361
0
    p.cu[BLOCK_64x64].ssimDist = ssimDist_c<6>;
1362
1363
0
    p.cu[BLOCK_8x8].normFact = normFact_c;
1364
0
    p.cu[BLOCK_16x16].normFact = normFact_c;
1365
0
    p.cu[BLOCK_32x32].normFact = normFact_c;
1366
0
    p.cu[BLOCK_64x64].normFact = normFact_c;
1367
    /* SubSample Luma*/
1368
0
    p.frameSubSampleLuma = frame_subsample_luma;
1369
0
}
1370
}