/src/x265/source/common/pixel.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Steve Borho <steve@borho.org> |
5 | | * Mandar Gurav <mandar@multicorewareinc.com> |
6 | | * Mahesh Pittala <mahesh@multicorewareinc.com> |
7 | | * Min Chen <min.chen@multicorewareinc.com> |
8 | | * Hongbin Liu<liuhongbin1@huawei.com> |
9 | | * |
10 | | * This program is free software; you can redistribute it and/or modify |
11 | | * it under the terms of the GNU General Public License as published by |
12 | | * the Free Software Foundation; either version 2 of the License, or |
13 | | * (at your option) any later version. |
14 | | * |
15 | | * This program is distributed in the hope that it will be useful, |
16 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | * GNU General Public License for more details. |
19 | | * |
20 | | * You should have received a copy of the GNU General Public License |
21 | | * along with this program; if not, write to the Free Software |
22 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
23 | | * |
24 | | * This program is also available under a commercial proprietary license. |
25 | | * For more information, contact us at license @ x265.com. |
26 | | *****************************************************************************/ |
27 | | |
28 | | #include "common.h" |
29 | | #include "slicetype.h" // LOWRES_COST_MASK |
30 | | #include "primitives.h" |
31 | | #include "x265.h" |
32 | | |
33 | | #include <cstdlib> // abs() |
34 | | |
35 | | using namespace X265_NS; |
36 | | |
37 | | namespace { |
38 | | // place functions in anonymous namespace (file static) |
39 | | |
40 | | template<int lx, int ly> |
41 | | int sad(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) |
42 | 32.1M | { |
43 | 32.1M | int sum = 0; |
44 | | |
45 | 203M | for (int y = 0; y < ly; y++) |
46 | 171M | { |
47 | 1.19G | for (int x = 0; x < lx; x++) |
48 | 1.02G | sum += abs(pix1[x] - pix2[x]); |
49 | | |
50 | 171M | pix1 += stride_pix1; |
51 | 171M | pix2 += stride_pix2; |
52 | 171M | } |
53 | | |
54 | 32.1M | return sum; |
55 | 32.1M | } pixel.cpp:int (anonymous namespace)::sad<4, 4>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 42 | 21.4M | { | 43 | 21.4M | int sum = 0; | 44 | | | 45 | 107M | for (int y = 0; y < ly; y++) | 46 | 85.8M | { | 47 | 429M | for (int x = 0; x < lx; x++) | 48 | 343M | sum += abs(pix1[x] - pix2[x]); | 49 | | | 50 | 85.8M | pix1 += stride_pix1; | 51 | 85.8M | pix2 += stride_pix2; | 52 | 85.8M | } | 53 | | | 54 | 21.4M | return sum; | 55 | 21.4M | } |
pixel.cpp:int (anonymous namespace)::sad<8, 8>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 42 | 10.6M | { | 43 | 10.6M | int sum = 0; | 44 | | | 45 | 95.8M | for (int y = 0; y < ly; y++) | 46 | 85.2M | { | 47 | 766M | for (int x = 0; x < lx; x++) | 48 | 681M | sum += abs(pix1[x] - pix2[x]); | 49 | | | 50 | 85.2M | pix1 += stride_pix1; | 51 | 85.2M | pix2 += stride_pix2; | 52 | 85.2M | } | 53 | | | 54 | 10.6M | return sum; | 55 | 10.6M | } |
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 4>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 12>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<12, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 4>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<4, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 24>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<24, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<8, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<32, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 48>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<48, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<64, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sad<16, 64>(unsigned char const*, long, unsigned char const*, long) |
56 | | |
57 | | template<int lx, int ly> |
58 | | int sad(const int16_t* pix1, intptr_t stride_pix1, const int16_t* pix2, intptr_t stride_pix2) |
59 | | { |
60 | | int sum = 0; |
61 | | |
62 | | for (int y = 0; y < ly; y++) |
63 | | { |
64 | | for (int x = 0; x < lx; x++) |
65 | | sum += abs(pix1[x] - pix2[x]); |
66 | | |
67 | | pix1 += stride_pix1; |
68 | | pix2 += stride_pix2; |
69 | | } |
70 | | |
71 | | return sum; |
72 | | } |
73 | | |
74 | | template<int lx, int ly> |
75 | | void sad_x3(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, intptr_t frefstride, int32_t* res) |
76 | 0 | { |
77 | 0 | res[0] = 0; |
78 | 0 | res[1] = 0; |
79 | 0 | res[2] = 0; |
80 | 0 | for (int y = 0; y < ly; y++) |
81 | 0 | { |
82 | 0 | for (int x = 0; x < lx; x++) |
83 | 0 | { |
84 | 0 | res[0] += abs(pix1[x] - pix2[x]); |
85 | 0 | res[1] += abs(pix1[x] - pix3[x]); |
86 | 0 | res[2] += abs(pix1[x] - pix4[x]); |
87 | 0 | } |
88 | |
|
89 | 0 | pix1 += FENC_STRIDE; |
90 | 0 | pix2 += frefstride; |
91 | 0 | pix3 += frefstride; |
92 | 0 | pix4 += frefstride; |
93 | 0 | } |
94 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 12>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<12, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<4, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 24>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<24, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<8, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<32, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 48>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<48, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<64, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x3<16, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) |
95 | | |
96 | | template<int lx, int ly> |
97 | | void sad_x4(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, const pixel* pix5, intptr_t frefstride, int32_t* res) |
98 | 0 | { |
99 | 0 | res[0] = 0; |
100 | 0 | res[1] = 0; |
101 | 0 | res[2] = 0; |
102 | 0 | res[3] = 0; |
103 | 0 | for (int y = 0; y < ly; y++) |
104 | 0 | { |
105 | 0 | for (int x = 0; x < lx; x++) |
106 | 0 | { |
107 | 0 | res[0] += abs(pix1[x] - pix2[x]); |
108 | 0 | res[1] += abs(pix1[x] - pix3[x]); |
109 | 0 | res[2] += abs(pix1[x] - pix4[x]); |
110 | 0 | res[3] += abs(pix1[x] - pix5[x]); |
111 | 0 | } |
112 | |
|
113 | 0 | pix1 += FENC_STRIDE; |
114 | 0 | pix2 += frefstride; |
115 | 0 | pix3 += frefstride; |
116 | 0 | pix4 += frefstride; |
117 | 0 | pix5 += frefstride; |
118 | 0 | } |
119 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 12>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<12, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 4>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<4, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 24>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<24, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 8>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<8, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 32>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<32, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 48>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<48, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<64, 16>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::sad_x4<16, 64>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, long, int*) |
120 | | |
121 | | template<int lx, int ly> |
122 | | int ads_x4(int encDC[4], uint32_t *sums, int delta, uint16_t *costMvX, int16_t *mvs, int width, int thresh) |
123 | 0 | { |
124 | 0 | int nmv = 0; |
125 | 0 | for (int16_t i = 0; i < width; i++, sums++) |
126 | 0 | { |
127 | 0 | int ads = abs(encDC[0] - long(sums[0])) |
128 | 0 | + abs(encDC[1] - long(sums[lx >> 1])) |
129 | 0 | + abs(encDC[2] - long(sums[delta])) |
130 | 0 | + abs(encDC[3] - long(sums[delta + (lx >> 1)])) |
131 | 0 | + costMvX[i]; |
132 | 0 | if (ads < thresh) |
133 | 0 | mvs[nmv++] = i; |
134 | 0 | } |
135 | 0 | return nmv; |
136 | 0 | } Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<16, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 24>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<24, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<32, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<8, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 48>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<48, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<64, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x4<16, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int) |
137 | | |
138 | | template<int lx, int ly> |
139 | | int ads_x2(int encDC[2], uint32_t *sums, int delta, uint16_t *costMvX, int16_t *mvs, int width, int thresh) |
140 | 0 | { |
141 | 0 | int nmv = 0; |
142 | 0 | for (int16_t i = 0; i < width; i++, sums++) |
143 | 0 | { |
144 | 0 | int ads = abs(encDC[0] - long(sums[0])) |
145 | 0 | + abs(encDC[1] - long(sums[delta])) |
146 | 0 | + costMvX[i]; |
147 | 0 | if (ads < thresh) |
148 | 0 | mvs[nmv++] = i; |
149 | 0 | } |
150 | 0 | return nmv; |
151 | 0 | } Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<8, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<4, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<16, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<8, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<32, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<16, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<64, 32>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x2<32, 64>(int*, unsigned int*, int, unsigned short*, short*, int, int) |
152 | | |
153 | | template<int lx, int ly> |
154 | | int ads_x1(int encDC[1], uint32_t *sums, int, uint16_t *costMvX, int16_t *mvs, int width, int thresh) |
155 | 0 | { |
156 | 0 | int nmv = 0; |
157 | 0 | for (int16_t i = 0; i < width; i++, sums++) |
158 | 0 | { |
159 | 0 | int ads = abs(encDC[0] - long(sums[0])) |
160 | 0 | + costMvX[i]; |
161 | 0 | if (ads < thresh) |
162 | 0 | mvs[nmv++] = i; |
163 | 0 | } |
164 | 0 | return nmv; |
165 | 0 | } Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<4, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<8, 8>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<16, 12>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<12, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<16, 4>(int*, unsigned int*, int, unsigned short*, short*, int, int) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::ads_x1<4, 16>(int*, unsigned int*, int, unsigned short*, short*, int, int) |
166 | | |
167 | | template<int lx, int ly, class T1, class T2> |
168 | | sse_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2) |
169 | 13.8M | { |
170 | 13.8M | sse_t sum = 0; |
171 | 13.8M | int tmp; |
172 | | |
173 | 86.5M | for (int y = 0; y < ly; y++) |
174 | 72.7M | { |
175 | 586M | for (int x = 0; x < lx; x++) |
176 | 514M | { |
177 | 514M | tmp = pix1[x] - pix2[x]; |
178 | 514M | sum += (tmp * tmp); |
179 | 514M | } |
180 | | |
181 | 72.7M | pix1 += stride_pix1; |
182 | 72.7M | pix2 += stride_pix2; |
183 | 72.7M | } |
184 | | |
185 | 13.8M | return sum; |
186 | 13.8M | } pixel.cpp:unsigned int (anonymous namespace)::sse<4, 4, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 169 | 10.7M | { | 170 | 10.7M | sse_t sum = 0; | 171 | 10.7M | int tmp; | 172 | | | 173 | 53.8M | for (int y = 0; y < ly; y++) | 174 | 43.0M | { | 175 | 215M | for (int x = 0; x < lx; x++) | 176 | 172M | { | 177 | 172M | tmp = pix1[x] - pix2[x]; | 178 | 172M | sum += (tmp * tmp); | 179 | 172M | } | 180 | | | 181 | 43.0M | pix1 += stride_pix1; | 182 | 43.0M | pix2 += stride_pix2; | 183 | 43.0M | } | 184 | | | 185 | 10.7M | return sum; | 186 | 10.7M | } |
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 4, short, short>(short const*, long, short const*, long) pixel.cpp:unsigned int (anonymous namespace)::sse<8, 8, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 169 | 2.50M | { | 170 | 2.50M | sse_t sum = 0; | 171 | 2.50M | int tmp; | 172 | | | 173 | 22.5M | for (int y = 0; y < ly; y++) | 174 | 20.0M | { | 175 | 180M | for (int x = 0; x < lx; x++) | 176 | 160M | { | 177 | 160M | tmp = pix1[x] - pix2[x]; | 178 | 160M | sum += (tmp * tmp); | 179 | 160M | } | 180 | | | 181 | 20.0M | pix1 += stride_pix1; | 182 | 20.0M | pix2 += stride_pix2; | 183 | 20.0M | } | 184 | | | 185 | 2.50M | return sum; | 186 | 2.50M | } |
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 8, short, short>(short const*, long, short const*, long) pixel.cpp:unsigned int (anonymous namespace)::sse<16, 16, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 169 | 491k | { | 170 | 491k | sse_t sum = 0; | 171 | 491k | int tmp; | 172 | | | 173 | 8.34M | for (int y = 0; y < ly; y++) | 174 | 7.85M | { | 175 | 133M | for (int x = 0; x < lx; x++) | 176 | 125M | { | 177 | 125M | tmp = pix1[x] - pix2[x]; | 178 | 125M | sum += (tmp * tmp); | 179 | 125M | } | 180 | | | 181 | 7.85M | pix1 += stride_pix1; | 182 | 7.85M | pix2 += stride_pix2; | 183 | 7.85M | } | 184 | | | 185 | 491k | return sum; | 186 | 491k | } |
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 16, short, short>(short const*, long, short const*, long) pixel.cpp:unsigned int (anonymous namespace)::sse<32, 32, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 169 | 54.7k | { | 170 | 54.7k | sse_t sum = 0; | 171 | 54.7k | int tmp; | 172 | | | 173 | 1.80M | for (int y = 0; y < ly; y++) | 174 | 1.75M | { | 175 | 57.7M | for (int x = 0; x < lx; x++) | 176 | 55.9M | { | 177 | 55.9M | tmp = pix1[x] - pix2[x]; | 178 | 55.9M | sum += (tmp * tmp); | 179 | 55.9M | } | 180 | | | 181 | 1.75M | pix1 += stride_pix1; | 182 | 1.75M | pix2 += stride_pix2; | 183 | 1.75M | } | 184 | | | 185 | 54.7k | return sum; | 186 | 54.7k | } |
Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 32, short, short>(short const*, long, short const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<64, 64, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<64, 64, short, short>(short const*, long, short const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<2, 2, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<2, 4, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<4, 8, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<8, 16, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<16, 32, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::sse<32, 64, unsigned char, unsigned char>(unsigned char const*, long, unsigned char const*, long) |
187 | | |
188 | 6.04G | #define BITS_PER_SUM (8 * sizeof(sum_t)) |
189 | | |
190 | 835M | #define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) { \ |
191 | 835M | sum2_t t0 = s0 + s1; \ |
192 | 835M | sum2_t t1 = s0 - s1; \ |
193 | 835M | sum2_t t2 = s2 + s3; \ |
194 | 835M | sum2_t t3 = s2 - s3; \ |
195 | 835M | d0 = t0 + t2; \ |
196 | 835M | d2 = t0 - t2; \ |
197 | 835M | d1 = t1 + t3; \ |
198 | 835M | d3 = t1 - t3; \ |
199 | 835M | } |
200 | | |
201 | | // in: a pseudo-simd number of the form x+(y<<16) |
202 | | // return: abs(x)+(abs(y)<<16) |
203 | | inline sum2_t abs2(sum2_t a) |
204 | 1.90G | { |
205 | 1.90G | sum2_t s = ((a >> (BITS_PER_SUM - 1)) & (((sum2_t)1 << BITS_PER_SUM) + 1)) * ((sum_t)-1); |
206 | | |
207 | 1.90G | return (a + s) ^ s; |
208 | 1.90G | } |
209 | | |
210 | | static int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) |
211 | 67.3M | { |
212 | 67.3M | sum2_t tmp[4][2]; |
213 | 67.3M | sum2_t a0, a1, a2, a3, b0, b1; |
214 | 67.3M | sum2_t sum = 0; |
215 | | |
216 | 336M | for (int i = 0; i < 4; i++, pix1 += stride_pix1, pix2 += stride_pix2) |
217 | 269M | { |
218 | 269M | a0 = pix1[0] - pix2[0]; |
219 | 269M | a1 = pix1[1] - pix2[1]; |
220 | 269M | b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM); |
221 | 269M | a2 = pix1[2] - pix2[2]; |
222 | 269M | a3 = pix1[3] - pix2[3]; |
223 | 269M | b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM); |
224 | 269M | tmp[i][0] = b0 + b1; |
225 | 269M | tmp[i][1] = b0 - b1; |
226 | 269M | } |
227 | | |
228 | 203M | for (int i = 0; i < 2; i++) |
229 | 136M | { |
230 | 136M | HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]); |
231 | 136M | a0 = abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3); |
232 | 136M | sum += ((sum_t)a0) + (a0 >> BITS_PER_SUM); |
233 | 136M | } |
234 | | |
235 | 67.3M | return (int)(sum >> 1); |
236 | 67.3M | } |
237 | | |
238 | | // x264's SWAR version of satd 8x4, performs two 4x4 SATDs at once |
239 | | static int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) |
240 | 2.10M | { |
241 | 2.10M | sum2_t tmp[4][4]; |
242 | 2.10M | sum2_t a0, a1, a2, a3; |
243 | 2.10M | sum2_t sum = 0; |
244 | | |
245 | 10.5M | for (int i = 0; i < 4; i++, pix1 += stride_pix1, pix2 += stride_pix2) |
246 | 8.41M | { |
247 | 8.41M | a0 = (pix1[0] - pix2[0]) + ((sum2_t)(pix1[4] - pix2[4]) << BITS_PER_SUM); |
248 | 8.41M | a1 = (pix1[1] - pix2[1]) + ((sum2_t)(pix1[5] - pix2[5]) << BITS_PER_SUM); |
249 | 8.41M | a2 = (pix1[2] - pix2[2]) + ((sum2_t)(pix1[6] - pix2[6]) << BITS_PER_SUM); |
250 | 8.41M | a3 = (pix1[3] - pix2[3]) + ((sum2_t)(pix1[7] - pix2[7]) << BITS_PER_SUM); |
251 | 8.41M | HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3); |
252 | 8.41M | } |
253 | | |
254 | 10.5M | for (int i = 0; i < 4; i++) |
255 | 8.41M | { |
256 | 8.41M | HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]); |
257 | 8.41M | sum += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3); |
258 | 8.41M | } |
259 | | |
260 | 2.10M | return (((sum_t)sum) + (sum >> BITS_PER_SUM)) >> 1; |
261 | 2.10M | } |
262 | | |
263 | | template<int w, int h> |
264 | | // calculate satd in blocks of 4x4 |
265 | | int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) |
266 | 0 | { |
267 | 0 | int satd = 0; |
268 | |
|
269 | | #if ENABLE_ASSEMBLY && X265_ARCH_ARM64 |
270 | | pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon; |
271 | | #endif |
272 | |
|
273 | 0 | for (int row = 0; row < h; row += 4) |
274 | 0 | for (int col = 0; col < w; col += 4) |
275 | 0 | satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1, |
276 | 0 | pix2 + row * stride_pix2 + col, stride_pix2); |
277 | |
|
278 | 0 | return satd; |
279 | 0 | } Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<12, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<16, 12>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<16, 4>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<8, 12>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<8, 4>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<12, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd4<4, 32>(unsigned char const*, long, unsigned char const*, long) |
280 | | |
281 | | template<int w, int h> |
282 | | // calculate satd in blocks of 8x4 |
283 | | int satd8(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) |
284 | 1.05M | { |
285 | 1.05M | int satd = 0; |
286 | | |
287 | | #if ENABLE_ASSEMBLY && X265_ARCH_ARM64 |
288 | | pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; |
289 | | #endif |
290 | | |
291 | 3.15M | for (int row = 0; row < h; row += 4) |
292 | 4.20M | for (int col = 0; col < w; col += 8) |
293 | 2.10M | satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, |
294 | 2.10M | pix2 + row * stride_pix2 + col, stride_pix2); |
295 | | |
296 | 1.05M | return satd; |
297 | 1.05M | } pixel.cpp:int (anonymous namespace)::satd8<8, 8>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 284 | 1.05M | { | 285 | 1.05M | int satd = 0; | 286 | | | 287 | | #if ENABLE_ASSEMBLY && X265_ARCH_ARM64 | 288 | | pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; | 289 | | #endif | 290 | | | 291 | 3.15M | for (int row = 0; row < h; row += 4) | 292 | 4.20M | for (int col = 0; col < w; col += 8) | 293 | 2.10M | satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, | 294 | 2.10M | pix2 + row * stride_pix2 + col, stride_pix2); | 295 | | | 296 | 1.05M | return satd; | 297 | 1.05M | } |
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 12>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 4>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 24>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<24, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 48>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<48, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<64, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<16, 24>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<32, 48>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<24, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::satd8<8, 64>(unsigned char const*, long, unsigned char const*, long) |
298 | | |
299 | | inline int _sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2) |
300 | 42.7M | { |
301 | 42.7M | sum2_t tmp[8][4]; |
302 | 42.7M | sum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3; |
303 | 42.7M | sum2_t sum = 0; |
304 | | |
305 | 381M | for (int i = 0; i < 8; i++, pix1 += i_pix1, pix2 += i_pix2) |
306 | 339M | { |
307 | 339M | a0 = pix1[0] - pix2[0]; |
308 | 339M | a1 = pix1[1] - pix2[1]; |
309 | 339M | b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM); |
310 | 339M | a2 = pix1[2] - pix2[2]; |
311 | 339M | a3 = pix1[3] - pix2[3]; |
312 | 339M | b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM); |
313 | 339M | a4 = pix1[4] - pix2[4]; |
314 | 339M | a5 = pix1[5] - pix2[5]; |
315 | 339M | b2 = (a4 + a5) + ((a4 - a5) << BITS_PER_SUM); |
316 | 339M | a6 = pix1[6] - pix2[6]; |
317 | 339M | a7 = pix1[7] - pix2[7]; |
318 | 339M | b3 = (a6 + a7) + ((a6 - a7) << BITS_PER_SUM); |
319 | 339M | HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], b0, b1, b2, b3); |
320 | 339M | } |
321 | | |
322 | 214M | for (int i = 0; i < 4; i++) |
323 | 171M | { |
324 | 171M | HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]); |
325 | 171M | HADAMARD4(a4, a5, a6, a7, tmp[4][i], tmp[5][i], tmp[6][i], tmp[7][i]); |
326 | 171M | b0 = abs2(a0 + a4) + abs2(a0 - a4); |
327 | 171M | b0 += abs2(a1 + a5) + abs2(a1 - a5); |
328 | 171M | b0 += abs2(a2 + a6) + abs2(a2 - a6); |
329 | 171M | b0 += abs2(a3 + a7) + abs2(a3 - a7); |
330 | 171M | sum += (sum_t)b0 + (b0 >> BITS_PER_SUM); |
331 | 171M | } |
332 | | |
333 | 42.7M | return (int)sum; |
334 | 42.7M | } |
335 | | |
336 | | inline int sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2) |
337 | 22.3M | { |
338 | 22.3M | return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2); |
339 | 22.3M | } |
340 | | |
341 | | static int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2) |
342 | 5.27M | { |
343 | 5.27M | int sum = _sa8d_8x8(pix1, i_pix1, pix2, i_pix2) |
344 | 5.27M | + _sa8d_8x8(pix1 + 8, i_pix1, pix2 + 8, i_pix2) |
345 | 5.27M | + _sa8d_8x8(pix1 + 8 * i_pix1, i_pix1, pix2 + 8 * i_pix2, i_pix2) |
346 | 5.27M | + _sa8d_8x8(pix1 + 8 + 8 * i_pix1, i_pix1, pix2 + 8 + 8 * i_pix2, i_pix2); |
347 | | |
348 | | // This matches x264 sa8d_16x16, but is slightly different from HM's behavior because |
349 | | // this version only rounds once at the end |
350 | 5.27M | return (sum + 2) >> 2; |
351 | 5.27M | } |
352 | | |
353 | | template<int w, int h> |
354 | | // Calculate sa8d in blocks of 8x8 |
355 | | int sa8d8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2) |
356 | 0 | { |
357 | 0 | int cost = 0; |
358 | |
|
359 | 0 | for (int y = 0; y < h; y += 8) |
360 | 0 | for (int x = 0; x < w; x += 8) |
361 | 0 | cost += sa8d_8x8(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2); |
362 | |
|
363 | 0 | return cost; |
364 | 0 | } Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d8<8, 8>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d8<8, 16>(unsigned char const*, long, unsigned char const*, long) |
365 | | |
366 | | template<int w, int h> |
367 | | // Calculate sa8d in blocks of 16x16 |
368 | | int sa8d16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2) |
369 | 606k | { |
370 | 606k | int cost = 0; |
371 | | |
372 | 1.81M | for (int y = 0; y < h; y += 16) |
373 | 3.63M | for (int x = 0; x < w; x += 16) |
374 | 2.42M | cost += sa8d_16x16(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2); |
375 | | |
376 | 606k | return cost; |
377 | 606k | } pixel.cpp:int (anonymous namespace)::sa8d16<32, 32>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 369 | 606k | { | 370 | 606k | int cost = 0; | 371 | | | 372 | 1.81M | for (int y = 0; y < h; y += 16) | 373 | 3.63M | for (int x = 0; x < w; x += 16) | 374 | 2.42M | cost += sa8d_16x16(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2); | 375 | | | 376 | 606k | return cost; | 377 | 606k | } |
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<64, 64>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<16, 16>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<16, 32>(unsigned char const*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::sa8d16<32, 64>(unsigned char const*, long, unsigned char const*, long) |
378 | | |
379 | | template<int size> |
380 | | sse_t pixel_ssd_s_c(const int16_t* a, intptr_t dstride) |
381 | 0 | { |
382 | 0 | sse_t sum = 0; |
383 | 0 | for (int y = 0; y < size; y++) |
384 | 0 | { |
385 | 0 | for (int x = 0; x < size; x++) |
386 | 0 | sum += a[x] * a[x]; |
387 | |
|
388 | 0 | a += dstride; |
389 | 0 | } |
390 | 0 | return sum; |
391 | 0 | } Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<4>(short const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<8>(short const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<16>(short const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<32>(short const*, long) Unexecuted instantiation: pixel.cpp:unsigned int (anonymous namespace)::pixel_ssd_s_c<64>(short const*, long) |
392 | | |
393 | | template<int size> |
394 | | void blockfill_s_c(int16_t* dst, intptr_t dstride, int16_t val) |
395 | 33.0k | { |
396 | 397k | for (int y = 0; y < size; y++) |
397 | 6.37M | for (int x = 0; x < size; x++) |
398 | 6.01M | dst[y * dstride + x] = val; |
399 | 33.0k | } pixel.cpp:void (anonymous namespace)::blockfill_s_c<4>(short*, long, short) Line | Count | Source | 395 | 10.2k | { | 396 | 51.0k | for (int y = 0; y < size; y++) | 397 | 204k | for (int x = 0; x < size; x++) | 398 | 163k | dst[y * dstride + x] = val; | 399 | 10.2k | } |
pixel.cpp:void (anonymous namespace)::blockfill_s_c<8>(short*, long, short) Line | Count | Source | 395 | 10.4k | { | 396 | 93.8k | for (int y = 0; y < size; y++) | 397 | 751k | for (int x = 0; x < size; x++) | 398 | 667k | dst[y * dstride + x] = val; | 399 | 10.4k | } |
pixel.cpp:void (anonymous namespace)::blockfill_s_c<16>(short*, long, short) Line | Count | Source | 395 | 9.79k | { | 396 | 166k | for (int y = 0; y < size; y++) | 397 | 2.66M | for (int x = 0; x < size; x++) | 398 | 2.50M | dst[y * dstride + x] = val; | 399 | 9.79k | } |
pixel.cpp:void (anonymous namespace)::blockfill_s_c<32>(short*, long, short) Line | Count | Source | 395 | 2.61k | { | 396 | 86.1k | for (int y = 0; y < size; y++) | 397 | 2.75M | for (int x = 0; x < size; x++) | 398 | 2.67M | dst[y * dstride + x] = val; | 399 | 2.61k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockfill_s_c<64>(short*, long, short) |
400 | | |
401 | | template<int size> |
402 | | void cpy2Dto1D_shl(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift) |
403 | 0 | { |
404 | 0 | X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n"); |
405 | 0 | X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n"); |
406 | 0 | X265_CHECK(shift >= 0, "invalid shift\n"); |
407 | |
|
408 | 0 | for (int i = 0; i < size; i++) |
409 | 0 | { |
410 | 0 | for (int j = 0; j < size; j++) |
411 | 0 | dst[j] = src[j] << shift; |
412 | |
|
413 | 0 | src += srcStride; |
414 | 0 | dst += size; |
415 | 0 | } |
416 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<4>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<8>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<16>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<32>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shl<64>(short*, short const*, long, int) |
417 | | |
418 | | template<int size> |
419 | | void cpy2Dto1D_shr(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift) |
420 | 0 | { |
421 | 0 | X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n"); |
422 | 0 | X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n"); |
423 | 0 | X265_CHECK(shift > 0, "invalid shift\n"); |
424 | |
|
425 | 0 | int16_t round = 1 << (shift - 1); |
426 | 0 | for (int i = 0; i < size; i++) |
427 | 0 | { |
428 | 0 | for (int j = 0; j < size; j++) |
429 | 0 | dst[j] = (src[j] + round) >> shift; |
430 | |
|
431 | 0 | src += srcStride; |
432 | 0 | dst += size; |
433 | 0 | } |
434 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<4>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<8>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<16>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<32>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy2Dto1D_shr<64>(short*, short const*, long, int) |
435 | | |
436 | | template<int size> |
437 | | void cpy1Dto2D_shl(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift) |
438 | 14.1k | { |
439 | 14.1k | X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n"); |
440 | 14.1k | X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n"); |
441 | 14.1k | X265_CHECK(shift >= 0, "invalid shift\n"); |
442 | | |
443 | 139k | for (int i = 0; i < size; i++) |
444 | 125k | { |
445 | 2.16M | for (int j = 0; j < size; j++) |
446 | 2.04M | dst[j] = src[j] << shift; |
447 | | |
448 | 125k | src += size; |
449 | 125k | dst += dstStride; |
450 | 125k | } |
451 | 14.1k | } pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<4>(short*, short const*, long, int) Line | Count | Source | 438 | 7.75k | { | 439 | 7.75k | X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n"); | 440 | 7.75k | X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n"); | 441 | 7.75k | X265_CHECK(shift >= 0, "invalid shift\n"); | 442 | | | 443 | 38.7k | for (int i = 0; i < size; i++) | 444 | 31.0k | { | 445 | 155k | for (int j = 0; j < size; j++) | 446 | 124k | dst[j] = src[j] << shift; | 447 | | | 448 | 31.0k | src += size; | 449 | 31.0k | dst += dstStride; | 450 | 31.0k | } | 451 | 7.75k | } |
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<8>(short*, short const*, long, int) Line | Count | Source | 438 | 3.49k | { | 439 | 3.49k | X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n"); | 440 | 3.49k | X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n"); | 441 | 3.49k | X265_CHECK(shift >= 0, "invalid shift\n"); | 442 | | | 443 | 31.4k | for (int i = 0; i < size; i++) | 444 | 27.9k | { | 445 | 251k | for (int j = 0; j < size; j++) | 446 | 223k | dst[j] = src[j] << shift; | 447 | | | 448 | 27.9k | src += size; | 449 | 27.9k | dst += dstStride; | 450 | 27.9k | } | 451 | 3.49k | } |
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<16>(short*, short const*, long, int) Line | Count | Source | 438 | 1.69k | { | 439 | 1.69k | X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n"); | 440 | 1.69k | X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n"); | 441 | 1.69k | X265_CHECK(shift >= 0, "invalid shift\n"); | 442 | | | 443 | 28.7k | for (int i = 0; i < size; i++) | 444 | 27.1k | { | 445 | 460k | for (int j = 0; j < size; j++) | 446 | 433k | dst[j] = src[j] << shift; | 447 | | | 448 | 27.1k | src += size; | 449 | 27.1k | dst += dstStride; | 450 | 27.1k | } | 451 | 1.69k | } |
pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<32>(short*, short const*, long, int) Line | Count | Source | 438 | 1.23k | { | 439 | 1.23k | X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n"); | 440 | 1.23k | X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n"); | 441 | 1.23k | X265_CHECK(shift >= 0, "invalid shift\n"); | 442 | | | 443 | 40.6k | for (int i = 0; i < size; i++) | 444 | 39.4k | { | 445 | 1.30M | for (int j = 0; j < size; j++) | 446 | 1.26M | dst[j] = src[j] << shift; | 447 | | | 448 | 39.4k | src += size; | 449 | 39.4k | dst += dstStride; | 450 | 39.4k | } | 451 | 1.23k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shl<64>(short*, short const*, long, int) |
452 | | |
453 | | template<int size> |
454 | | void cpy1Dto2D_shr(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift) |
455 | 0 | { |
456 | 0 | X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n"); |
457 | 0 | X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n"); |
458 | 0 | X265_CHECK(shift > 0, "invalid shift\n"); |
459 | |
|
460 | 0 | int16_t round = 1 << (shift - 1); |
461 | 0 | for (int i = 0; i < size; i++) |
462 | 0 | { |
463 | 0 | for (int j = 0; j < size; j++) |
464 | 0 | dst[j] = (src[j] + round) >> shift; |
465 | |
|
466 | 0 | src += size; |
467 | 0 | dst += dstStride; |
468 | 0 | } |
469 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<4>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<8>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<16>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<32>(short*, short const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::cpy1Dto2D_shr<64>(short*, short const*, long, int) |
470 | | |
471 | | template<int blockSize> |
472 | | void getResidual(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride) |
473 | 13.0M | { |
474 | 78.5M | for (int y = 0; y < blockSize; y++) |
475 | 65.4M | { |
476 | 497M | for (int x = 0; x < blockSize; x++) |
477 | 432M | residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]); |
478 | | |
479 | 65.4M | fenc += stride; |
480 | 65.4M | residual += stride; |
481 | 65.4M | pred += stride; |
482 | 65.4M | } |
483 | 13.0M | } pixel.cpp:void (anonymous namespace)::getResidual<4>(unsigned char const*, unsigned char const*, short*, long) Line | Count | Source | 473 | 10.7M | { | 474 | 53.8M | for (int y = 0; y < blockSize; y++) | 475 | 43.1M | { | 476 | 215M | for (int x = 0; x < blockSize; x++) | 477 | 172M | residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]); | 478 | | | 479 | 43.1M | fenc += stride; | 480 | 43.1M | residual += stride; | 481 | 43.1M | pred += stride; | 482 | 43.1M | } | 483 | 10.7M | } |
pixel.cpp:void (anonymous namespace)::getResidual<8>(unsigned char const*, unsigned char const*, short*, long) Line | Count | Source | 473 | 1.82M | { | 474 | 16.4M | for (int y = 0; y < blockSize; y++) | 475 | 14.6M | { | 476 | 131M | for (int x = 0; x < blockSize; x++) | 477 | 116M | residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]); | 478 | | | 479 | 14.6M | fenc += stride; | 480 | 14.6M | residual += stride; | 481 | 14.6M | pred += stride; | 482 | 14.6M | } | 483 | 1.82M | } |
pixel.cpp:void (anonymous namespace)::getResidual<16>(unsigned char const*, unsigned char const*, short*, long) Line | Count | Source | 473 | 409k | { | 474 | 6.95M | for (int y = 0; y < blockSize; y++) | 475 | 6.54M | { | 476 | 111M | for (int x = 0; x < blockSize; x++) | 477 | 104M | residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]); | 478 | | | 479 | 6.54M | fenc += stride; | 480 | 6.54M | residual += stride; | 481 | 6.54M | pred += stride; | 482 | 6.54M | } | 483 | 409k | } |
pixel.cpp:void (anonymous namespace)::getResidual<32>(unsigned char const*, unsigned char const*, short*, long) Line | Count | Source | 473 | 37.4k | { | 474 | 1.23M | for (int y = 0; y < blockSize; y++) | 475 | 1.19M | { | 476 | 39.5M | for (int x = 0; x < blockSize; x++) | 477 | 38.3M | residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]); | 478 | | | 479 | 1.19M | fenc += stride; | 480 | 1.19M | residual += stride; | 481 | 1.19M | pred += stride; | 482 | 1.19M | } | 483 | 37.4k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::getResidual<64>(unsigned char const*, unsigned char const*, short*, long) |
484 | | |
485 | | template<int blockSize> |
486 | | void transpose(pixel* dst, const pixel* src, intptr_t stride) |
487 | 0 | { |
488 | 0 | for (int k = 0; k < blockSize; k++) |
489 | 0 | for (int l = 0; l < blockSize; l++) |
490 | 0 | dst[k * blockSize + l] = src[l * stride + k]; |
491 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<4>(unsigned char*, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<8>(unsigned char*, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<16>(unsigned char*, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<32>(unsigned char*, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::transpose<64>(unsigned char*, unsigned char const*, long) |
492 | | |
493 | | static void weight_sp_c(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset) |
494 | 0 | { |
495 | 0 | int x, y; |
496 | |
|
497 | | #if CHECKED_BUILD || _DEBUG |
498 | | const int correction = (IF_INTERNAL_PREC - X265_DEPTH); |
499 | | X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n"); |
500 | | X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n"); |
501 | | X265_CHECK((shift >= correction), "shift must be include factor correction, please update ASM ABI\n"); |
502 | | #endif |
503 | |
|
504 | 0 | for (y = 0; y <= height - 1; y++) |
505 | 0 | { |
506 | 0 | for (x = 0; x <= width - 1; ) |
507 | 0 | { |
508 | | // note: width can be odd |
509 | 0 | dst[x] = x265_clip(((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset); |
510 | 0 | x++; |
511 | 0 | } |
512 | |
|
513 | 0 | src += srcStride; |
514 | 0 | dst += dstStride; |
515 | 0 | } |
516 | 0 | } |
517 | | |
518 | | static void weight_pp_c(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset) |
519 | 0 | { |
520 | 0 | int x, y; |
521 | |
|
522 | 0 | const int correction = (IF_INTERNAL_PREC - X265_DEPTH); |
523 | |
|
524 | 0 | X265_CHECK(!(width & 15), "weightp alignment error\n"); |
525 | 0 | X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n"); |
526 | 0 | X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n"); |
527 | 0 | X265_CHECK((shift >= correction), "shift must be include factor correction, please update ASM ABI\n"); |
528 | 0 | X265_CHECK(!(round & ((1 << correction) - 1)), "round must be include factor correction, please update ASM ABI\n"); |
529 | |
|
530 | 0 | for (y = 0; y <= height - 1; y++) |
531 | 0 | { |
532 | 0 | for (x = 0; x <= width - 1; ) |
533 | 0 | { |
534 | | // simulating pixel to short conversion |
535 | 0 | int16_t val = src[x] << correction; |
536 | 0 | dst[x] = x265_clip(((w0 * (val) + round) >> shift) + offset); |
537 | 0 | x++; |
538 | 0 | } |
539 | |
|
540 | 0 | src += stride; |
541 | 0 | dst += stride; |
542 | 0 | } |
543 | 0 | } |
544 | | |
545 | | template<int lx, int ly> |
546 | | void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int) |
547 | 0 | { |
548 | 0 | for (int y = 0; y < ly; y++) |
549 | 0 | { |
550 | 0 | for (int x = 0; x < lx; x++) |
551 | 0 | dst[x] = (src0[x] + src1[x] + 1) >> 1; |
552 | |
|
553 | 0 | src0 += sstride0; |
554 | 0 | src1 += sstride1; |
555 | 0 | dst += dstride; |
556 | 0 | } |
557 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 12>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<12, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 4>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<4, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 24>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<24, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 8>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<8, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 32>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<32, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 48>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<48, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<64, 16>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixelavg_pp<16, 64>(unsigned char*, long, unsigned char const*, long, unsigned char const*, long, int) |
558 | | |
559 | | static void scale1D_128to64(pixel *dst, const pixel *src) |
560 | 0 | { |
561 | 0 | int x; |
562 | 0 | const pixel* src1 = src; |
563 | 0 | const pixel* src2 = src + 128; |
564 | |
|
565 | 0 | pixel* dst1 = dst; |
566 | 0 | pixel* dst2 = dst + 64/*128*/; |
567 | |
|
568 | 0 | for (x = 0; x < 128; x += 2) |
569 | 0 | { |
570 | | // Top pixel |
571 | 0 | pixel pix0 = src1[(x + 0)]; |
572 | 0 | pixel pix1 = src1[(x + 1)]; |
573 | | |
574 | | // Left pixel |
575 | 0 | pixel pix2 = src2[(x + 0)]; |
576 | 0 | pixel pix3 = src2[(x + 1)]; |
577 | 0 | int sum1 = pix0 + pix1; |
578 | 0 | int sum2 = pix2 + pix3; |
579 | |
|
580 | 0 | dst1[x >> 1] = (pixel)((sum1 + 1) >> 1); |
581 | 0 | dst2[x >> 1] = (pixel)((sum2 + 1) >> 1); |
582 | 0 | } |
583 | 0 | } |
584 | | |
585 | | static void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride) |
586 | 0 | { |
587 | 0 | uint32_t x, y; |
588 | |
|
589 | 0 | for (y = 0; y < 64; y += 2) |
590 | 0 | { |
591 | 0 | for (x = 0; x < 64; x += 2) |
592 | 0 | { |
593 | 0 | pixel pix0 = src[(y + 0) * stride + (x + 0)]; |
594 | 0 | pixel pix1 = src[(y + 0) * stride + (x + 1)]; |
595 | 0 | pixel pix2 = src[(y + 1) * stride + (x + 0)]; |
596 | 0 | pixel pix3 = src[(y + 1) * stride + (x + 1)]; |
597 | 0 | int sum = pix0 + pix1 + pix2 + pix3; |
598 | |
|
599 | 0 | dst[y / 2 * 32 + x / 2] = (pixel)((sum + 2) >> 2); |
600 | 0 | } |
601 | 0 | } |
602 | 0 | } |
603 | | |
604 | | static |
605 | | void frame_init_lowres_core(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc, |
606 | | intptr_t src_stride, intptr_t dst_stride, int width, int height) |
607 | 698 | { |
608 | 61.1k | for (int y = 0; y < height; y++) |
609 | 60.4k | { |
610 | 60.4k | const pixel* src1 = src0 + src_stride; |
611 | 60.4k | const pixel* src2 = src1 + src_stride; |
612 | 5.67M | for (int x = 0; x < width; x++) |
613 | 5.61M | { |
614 | | // slower than naive bilinear, but matches asm |
615 | 22.4M | #define FILTER(a, b, c, d) ((((a + b + 1) >> 1) + ((c + d + 1) >> 1) + 1) >> 1) |
616 | 5.61M | dst0[x] = FILTER(src0[2 * x], src1[2 * x], src0[2 * x + 1], src1[2 * x + 1]); |
617 | 5.61M | dsth[x] = FILTER(src0[2 * x + 1], src1[2 * x + 1], src0[2 * x + 2], src1[2 * x + 2]); |
618 | 5.61M | dstv[x] = FILTER(src1[2 * x], src2[2 * x], src1[2 * x + 1], src2[2 * x + 1]); |
619 | 5.61M | dstc[x] = FILTER(src1[2 * x + 1], src2[2 * x + 1], src1[2 * x + 2], src2[2 * x + 2]); |
620 | 5.61M | #undef FILTER |
621 | 5.61M | } |
622 | 60.4k | src0 += src_stride * 2; |
623 | 60.4k | dst0 += dst_stride; |
624 | 60.4k | dsth += dst_stride; |
625 | 60.4k | dstv += dst_stride; |
626 | 60.4k | dstc += dst_stride; |
627 | 60.4k | } |
628 | 698 | } |
629 | | |
630 | | /* structural similarity metric */ |
631 | | static void ssim_4x4x2_core(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]) |
632 | 0 | { |
633 | 0 | for (int z = 0; z < 2; z++) |
634 | 0 | { |
635 | 0 | uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0; |
636 | 0 | for (int y = 0; y < 4; y++) |
637 | 0 | { |
638 | 0 | for (int x = 0; x < 4; x++) |
639 | 0 | { |
640 | 0 | int a = pix1[x + y * stride1]; |
641 | 0 | int b = pix2[x + y * stride2]; |
642 | 0 | s1 += a; |
643 | 0 | s2 += b; |
644 | 0 | ss += a * a; |
645 | 0 | ss += b * b; |
646 | 0 | s12 += a * b; |
647 | 0 | } |
648 | 0 | } |
649 | |
|
650 | 0 | sums[z][0] = s1; |
651 | 0 | sums[z][1] = s2; |
652 | 0 | sums[z][2] = ss; |
653 | 0 | sums[z][3] = s12; |
654 | 0 | pix1 += 4; |
655 | 0 | pix2 += 4; |
656 | 0 | } |
657 | 0 | } |
658 | | |
659 | | static float ssim_end_1(int s1, int s2, int ss, int s12) |
660 | 0 | { |
661 | | /* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases. |
662 | | * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784. |
663 | | * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */ |
664 | |
|
665 | | #if HIGH_BIT_DEPTH |
666 | | X265_CHECK((X265_DEPTH == 10) || (X265_DEPTH == 12), "ssim invalid depth\n"); |
667 | | #define type float |
668 | | static const float ssim_c1 = (float)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64); |
669 | | static const float ssim_c2 = (float)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63); |
670 | | #else |
671 | 0 | X265_CHECK(X265_DEPTH == 8, "ssim invalid depth\n"); |
672 | 0 | #define type int |
673 | 0 | static const int ssim_c1 = (int)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64 + .5); |
674 | 0 | static const int ssim_c2 = (int)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63 + .5); |
675 | 0 | #endif |
676 | 0 | type fs1 = (type)s1; |
677 | 0 | type fs2 = (type)s2; |
678 | 0 | type fss = (type)ss; |
679 | 0 | type fs12 = (type)s12; |
680 | 0 | type vars = (type)(fss * 64 - fs1 * fs1 - fs2 * fs2); |
681 | 0 | type covar = (type)(fs12 * 64 - fs1 * fs2); |
682 | 0 | return (float)(2 * fs1 * fs2 + ssim_c1) * (float)(2 * covar + ssim_c2) |
683 | 0 | / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + ssim_c2)); |
684 | 0 | #undef type |
685 | 0 | #undef PIXEL_MAX |
686 | 0 | } |
687 | | |
688 | | static float ssim_end_4(int sum0[5][4], int sum1[5][4], int width) |
689 | 0 | { |
690 | 0 | float ssim = 0.0; |
691 | |
|
692 | 0 | for (int i = 0; i < width; i++) |
693 | 0 | { |
694 | 0 | ssim += ssim_end_1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] + sum1[i + 1][0], |
695 | 0 | sum0[i][1] + sum0[i + 1][1] + sum1[i][1] + sum1[i + 1][1], |
696 | 0 | sum0[i][2] + sum0[i + 1][2] + sum1[i][2] + sum1[i + 1][2], |
697 | 0 | sum0[i][3] + sum0[i + 1][3] + sum1[i][3] + sum1[i + 1][3]); |
698 | 0 | } |
699 | |
|
700 | 0 | return ssim; |
701 | 0 | } |
702 | | |
703 | | template<int size> |
704 | | uint64_t pixel_var(const pixel* pix, intptr_t i_stride) |
705 | 181k | { |
706 | 181k | uint32_t sum = 0, sqr = 0; |
707 | | |
708 | 2.11M | for (int y = 0; y < size; y++) |
709 | 1.93M | { |
710 | 25.1M | for (int x = 0; x < size; x++) |
711 | 23.1M | { |
712 | 23.1M | sum += pix[x]; |
713 | 23.1M | sqr += pix[x] * pix[x]; |
714 | 23.1M | } |
715 | | |
716 | 1.93M | pix += i_stride; |
717 | 1.93M | } |
718 | | |
719 | 181k | return sum + ((uint64_t)sqr << 32); |
720 | 181k | } Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<4>(unsigned char const*, long) pixel.cpp:unsigned long (anonymous namespace)::pixel_var<8>(unsigned char const*, long) Line | Count | Source | 705 | 120k | { | 706 | 120k | uint32_t sum = 0, sqr = 0; | 707 | | | 708 | 1.08M | for (int y = 0; y < size; y++) | 709 | 966k | { | 710 | 8.69M | for (int x = 0; x < size; x++) | 711 | 7.72M | { | 712 | 7.72M | sum += pix[x]; | 713 | 7.72M | sqr += pix[x] * pix[x]; | 714 | 7.72M | } | 715 | | | 716 | 966k | pix += i_stride; | 717 | 966k | } | 718 | | | 719 | 120k | return sum + ((uint64_t)sqr << 32); | 720 | 120k | } |
pixel.cpp:unsigned long (anonymous namespace)::pixel_var<16>(unsigned char const*, long) Line | Count | Source | 705 | 60.3k | { | 706 | 60.3k | uint32_t sum = 0, sqr = 0; | 707 | | | 708 | 1.02M | for (int y = 0; y < size; y++) | 709 | 966k | { | 710 | 16.4M | for (int x = 0; x < size; x++) | 711 | 15.4M | { | 712 | 15.4M | sum += pix[x]; | 713 | 15.4M | sqr += pix[x] * pix[x]; | 714 | 15.4M | } | 715 | | | 716 | 966k | pix += i_stride; | 717 | 966k | } | 718 | | | 719 | 60.3k | return sum + ((uint64_t)sqr << 32); | 720 | 60.3k | } |
Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<32>(unsigned char const*, long) Unexecuted instantiation: pixel.cpp:unsigned long (anonymous namespace)::pixel_var<64>(unsigned char const*, long) |
721 | | |
722 | | #if defined(_MSC_VER) |
723 | | #pragma warning(disable: 4127) // conditional expression is constant |
724 | | #endif |
725 | | |
726 | | template<int size> |
727 | | int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) |
728 | 13.8M | { |
729 | 13.8M | static pixel zeroBuf[8] /* = { 0 } */; |
730 | | |
731 | 13.8M | if (size) |
732 | 3.04M | { |
733 | 3.04M | int dim = 1 << (size + 2); |
734 | 3.04M | uint32_t totEnergy = 0; |
735 | 6.75M | for (int i = 0; i < dim; i += 8) |
736 | 3.70M | { |
737 | 9.04M | for (int j = 0; j < dim; j+= 8) |
738 | 5.34M | { |
739 | | /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */ |
740 | 5.34M | int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - |
741 | 5.34M | (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2); |
742 | 5.34M | int reconEnergy = sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - |
743 | 5.34M | (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2); |
744 | | |
745 | 5.34M | totEnergy += abs(sourceEnergy - reconEnergy); |
746 | 5.34M | } |
747 | 3.70M | } |
748 | 3.04M | return totEnergy; |
749 | 3.04M | } |
750 | 10.7M | else |
751 | 10.7M | { |
752 | | /* 4x4 is too small for sa8d */ |
753 | 10.7M | int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2); |
754 | 10.7M | int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2); |
755 | 10.7M | return abs(sourceEnergy - reconEnergy); |
756 | 10.7M | } |
757 | 13.8M | } pixel.cpp:int (anonymous namespace)::psyCost_pp<0>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 728 | 10.7M | { | 729 | 10.7M | static pixel zeroBuf[8] /* = { 0 } */; | 730 | | | 731 | 10.7M | if (size) | 732 | 0 | { | 733 | 0 | int dim = 1 << (size + 2); | 734 | 0 | uint32_t totEnergy = 0; | 735 | 0 | for (int i = 0; i < dim; i += 8) | 736 | 0 | { | 737 | 0 | for (int j = 0; j < dim; j+= 8) | 738 | 0 | { | 739 | | /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */ | 740 | 0 | int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - | 741 | 0 | (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2); | 742 | 0 | int reconEnergy = sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - | 743 | 0 | (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2); | 744 | |
| 745 | 0 | totEnergy += abs(sourceEnergy - reconEnergy); | 746 | 0 | } | 747 | 0 | } | 748 | 0 | return totEnergy; | 749 | 0 | } | 750 | 10.7M | else | 751 | 10.7M | { | 752 | | /* 4x4 is too small for sa8d */ | 753 | 10.7M | int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2); | 754 | 10.7M | int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2); | 755 | 10.7M | return abs(sourceEnergy - reconEnergy); | 756 | 10.7M | } | 757 | 10.7M | } |
pixel.cpp:int (anonymous namespace)::psyCost_pp<1>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 728 | 2.50M | { | 729 | 2.50M | static pixel zeroBuf[8] /* = { 0 } */; | 730 | | | 731 | 2.50M | if (size) | 732 | 2.50M | { | 733 | 2.50M | int dim = 1 << (size + 2); | 734 | 2.50M | uint32_t totEnergy = 0; | 735 | 5.00M | for (int i = 0; i < dim; i += 8) | 736 | 2.50M | { | 737 | 5.00M | for (int j = 0; j < dim; j+= 8) | 738 | 2.50M | { | 739 | | /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */ | 740 | 2.50M | int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - | 741 | 2.50M | (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2); | 742 | 2.50M | int reconEnergy = sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - | 743 | 2.50M | (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2); | 744 | | | 745 | 2.50M | totEnergy += abs(sourceEnergy - reconEnergy); | 746 | 2.50M | } | 747 | 2.50M | } | 748 | 2.50M | return totEnergy; | 749 | 2.50M | } | 750 | 0 | else | 751 | 0 | { | 752 | | /* 4x4 is too small for sa8d */ | 753 | 0 | int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2); | 754 | 0 | int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2); | 755 | 0 | return abs(sourceEnergy - reconEnergy); | 756 | 0 | } | 757 | 2.50M | } |
pixel.cpp:int (anonymous namespace)::psyCost_pp<2>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 728 | 491k | { | 729 | 491k | static pixel zeroBuf[8] /* = { 0 } */; | 730 | | | 731 | 491k | if (size) | 732 | 491k | { | 733 | 491k | int dim = 1 << (size + 2); | 734 | 491k | uint32_t totEnergy = 0; | 735 | 1.47M | for (int i = 0; i < dim; i += 8) | 736 | 982k | { | 737 | 2.94M | for (int j = 0; j < dim; j+= 8) | 738 | 1.96M | { | 739 | | /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */ | 740 | 1.96M | int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - | 741 | 1.96M | (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2); | 742 | 1.96M | int reconEnergy = sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - | 743 | 1.96M | (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2); | 744 | | | 745 | 1.96M | totEnergy += abs(sourceEnergy - reconEnergy); | 746 | 1.96M | } | 747 | 982k | } | 748 | 491k | return totEnergy; | 749 | 491k | } | 750 | 0 | else | 751 | 0 | { | 752 | | /* 4x4 is too small for sa8d */ | 753 | 0 | int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2); | 754 | 0 | int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2); | 755 | 0 | return abs(sourceEnergy - reconEnergy); | 756 | 0 | } | 757 | 491k | } |
pixel.cpp:int (anonymous namespace)::psyCost_pp<3>(unsigned char const*, long, unsigned char const*, long) Line | Count | Source | 728 | 54.7k | { | 729 | 54.7k | static pixel zeroBuf[8] /* = { 0 } */; | 730 | | | 731 | 54.7k | if (size) | 732 | 54.7k | { | 733 | 54.7k | int dim = 1 << (size + 2); | 734 | 54.7k | uint32_t totEnergy = 0; | 735 | 273k | for (int i = 0; i < dim; i += 8) | 736 | 219k | { | 737 | 1.09M | for (int j = 0; j < dim; j+= 8) | 738 | 876k | { | 739 | | /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */ | 740 | 876k | int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - | 741 | 876k | (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2); | 742 | 876k | int reconEnergy = sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - | 743 | 876k | (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2); | 744 | | | 745 | 876k | totEnergy += abs(sourceEnergy - reconEnergy); | 746 | 876k | } | 747 | 219k | } | 748 | 54.7k | return totEnergy; | 749 | 54.7k | } | 750 | 0 | else | 751 | 0 | { | 752 | | /* 4x4 is too small for sa8d */ | 753 | 0 | int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2); | 754 | 0 | int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2); | 755 | 0 | return abs(sourceEnergy - reconEnergy); | 756 | 0 | } | 757 | 54.7k | } |
Unexecuted instantiation: pixel.cpp:int (anonymous namespace)::psyCost_pp<4>(unsigned char const*, long, unsigned char const*, long) |
758 | | |
759 | | template<int bx, int by> |
760 | | void blockcopy_pp_c(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb) |
761 | 35.2M | { |
762 | 220M | for (int y = 0; y < by; y++) |
763 | 185M | { |
764 | 1.54G | for (int x = 0; x < bx; x++) |
765 | 1.35G | a[x] = b[x]; |
766 | | |
767 | 185M | a += stridea; |
768 | 185M | b += strideb; |
769 | 185M | } |
770 | 35.2M | } pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 4>(unsigned char*, long, unsigned char const*, long) Line | Count | Source | 761 | 27.7M | { | 762 | 138M | for (int y = 0; y < by; y++) | 763 | 111M | { | 764 | 555M | for (int x = 0; x < bx; x++) | 765 | 444M | a[x] = b[x]; | 766 | | | 767 | 111M | a += stridea; | 768 | 111M | b += strideb; | 769 | 111M | } | 770 | 27.7M | } |
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 8>(unsigned char*, long, unsigned char const*, long) Line | Count | Source | 761 | 5.94M | { | 762 | 53.4M | for (int y = 0; y < by; y++) | 763 | 47.4M | { | 764 | 426M | for (int x = 0; x < bx; x++) | 765 | 379M | a[x] = b[x]; | 766 | | | 767 | 47.4M | a += stridea; | 768 | 47.4M | b += strideb; | 769 | 47.4M | } | 770 | 5.94M | } |
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 16>(unsigned char*, long, unsigned char const*, long) Line | Count | Source | 761 | 1.32M | { | 762 | 22.4M | for (int y = 0; y < by; y++) | 763 | 21.0M | { | 764 | 358M | for (int x = 0; x < bx; x++) | 765 | 336M | a[x] = b[x]; | 766 | | | 767 | 21.0M | a += stridea; | 768 | 21.0M | b += strideb; | 769 | 21.0M | } | 770 | 1.32M | } |
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 32>(unsigned char*, long, unsigned char const*, long) Line | Count | Source | 761 | 166k | { | 762 | 5.46M | for (int y = 0; y < by; y++) | 763 | 5.29M | { | 764 | 174M | for (int x = 0; x < bx; x++) | 765 | 169M | a[x] = b[x]; | 766 | | | 767 | 5.29M | a += stridea; | 768 | 5.29M | b += strideb; | 769 | 5.29M | } | 770 | 166k | } |
pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 64>(unsigned char*, long, unsigned char const*, long) Line | Count | Source | 761 | 6.19k | { | 762 | 402k | for (int y = 0; y < by; y++) | 763 | 396k | { | 764 | 25.7M | for (int x = 0; x < bx; x++) | 765 | 25.3M | a[x] = b[x]; | 766 | | | 767 | 396k | a += stridea; | 768 | 396k | b += strideb; | 769 | 396k | } | 770 | 6.19k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 8>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 4>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 8>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 12>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<12, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 4>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 32>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 24>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<24, 32>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 8>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 32>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 32>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 64>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 48>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<48, 64>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<64, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 64>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 2>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 4>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 2>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 6>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<6, 8>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 2>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 8>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 12>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<6, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<2, 16>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<16, 24>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<12, 32>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<4, 32>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<32, 48>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<24, 64>(unsigned char*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_pp_c<8, 64>(unsigned char*, long, unsigned char const*, long) |
771 | | |
772 | | template<int bx, int by> |
773 | | void blockcopy_ss_c(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb) |
774 | 0 | { |
775 | 0 | for (int y = 0; y < by; y++) |
776 | 0 | { |
777 | 0 | for (int x = 0; x < bx; x++) |
778 | 0 | a[x] = b[x]; |
779 | |
|
780 | 0 | a += stridea; |
781 | 0 | b += strideb; |
782 | 0 | } |
783 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<4, 4>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<8, 8>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<16, 16>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<32, 32>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<64, 64>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<2, 2>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<2, 4>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<4, 8>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<8, 16>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<16, 32>(short*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ss_c<32, 64>(short*, long, short const*, long) |
784 | | |
785 | | template<int bx, int by> |
786 | | void blockcopy_sp_c(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb) |
787 | 0 | { |
788 | 0 | for (int y = 0; y < by; y++) |
789 | 0 | { |
790 | 0 | for (int x = 0; x < bx; x++) |
791 | 0 | { |
792 | 0 | X265_CHECK((b[x] >= 0) && (b[x] <= ((1 << X265_DEPTH) - 1)), "blockcopy pixel size fail\n"); |
793 | 0 | a[x] = (pixel)b[x]; |
794 | 0 | } |
795 | |
|
796 | 0 | a += stridea; |
797 | 0 | b += strideb; |
798 | 0 | } |
799 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<4, 4>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<8, 8>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<16, 16>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<32, 32>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<64, 64>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<2, 2>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<2, 4>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<4, 8>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<8, 16>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<16, 32>(unsigned char*, long, short const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_sp_c<32, 64>(unsigned char*, long, short const*, long) |
800 | | |
801 | | template<int bx, int by> |
802 | | void blockcopy_ps_c(int16_t* a, intptr_t stridea, const pixel* b, intptr_t strideb) |
803 | 3.68M | { |
804 | 23.8M | for (int y = 0; y < by; y++) |
805 | 20.2M | { |
806 | 177M | for (int x = 0; x < bx; x++) |
807 | 157M | a[x] = (int16_t)b[x]; |
808 | | |
809 | 20.2M | a += stridea; |
810 | 20.2M | b += strideb; |
811 | 20.2M | } |
812 | 3.68M | } pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<4, 4>(short*, long, unsigned char const*, long) Line | Count | Source | 803 | 2.80M | { | 804 | 14.0M | for (int y = 0; y < by; y++) | 805 | 11.2M | { | 806 | 56.0M | for (int x = 0; x < bx; x++) | 807 | 44.8M | a[x] = (int16_t)b[x]; | 808 | | | 809 | 11.2M | a += stridea; | 810 | 11.2M | b += strideb; | 811 | 11.2M | } | 812 | 2.80M | } |
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<8, 8>(short*, long, unsigned char const*, long) Line | Count | Source | 803 | 696k | { | 804 | 6.26M | for (int y = 0; y < by; y++) | 805 | 5.56M | { | 806 | 50.1M | for (int x = 0; x < bx; x++) | 807 | 44.5M | a[x] = (int16_t)b[x]; | 808 | | | 809 | 5.56M | a += stridea; | 810 | 5.56M | b += strideb; | 811 | 5.56M | } | 812 | 696k | } |
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<16, 16>(short*, long, unsigned char const*, long) Line | Count | Source | 803 | 163k | { | 804 | 2.78M | for (int y = 0; y < by; y++) | 805 | 2.61M | { | 806 | 44.5M | for (int x = 0; x < bx; x++) | 807 | 41.8M | a[x] = (int16_t)b[x]; | 808 | | | 809 | 2.61M | a += stridea; | 810 | 2.61M | b += strideb; | 811 | 2.61M | } | 812 | 163k | } |
pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<32, 32>(short*, long, unsigned char const*, long) Line | Count | Source | 803 | 25.2k | { | 804 | 831k | for (int y = 0; y < by; y++) | 805 | 806k | { | 806 | 26.6M | for (int x = 0; x < bx; x++) | 807 | 25.7M | a[x] = (int16_t)b[x]; | 808 | | | 809 | 806k | a += stridea; | 810 | 806k | b += strideb; | 811 | 806k | } | 812 | 25.2k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<64, 64>(short*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<2, 2>(short*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<2, 4>(short*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<4, 8>(short*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<8, 16>(short*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<16, 32>(short*, long, unsigned char const*, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::blockcopy_ps_c<32, 64>(short*, long, unsigned char const*, long) |
813 | | |
814 | | template<int bx, int by> |
815 | | void pixel_sub_ps_c(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1) |
816 | 25.2k | { |
817 | 619k | for (int y = 0; y < by; y++) |
818 | 593k | { |
819 | 23.0M | for (int x = 0; x < bx; x++) |
820 | 22.4M | a[x] = (int16_t)(b0[x] - b1[x]); |
821 | | |
822 | 593k | b0 += sstride0; |
823 | 593k | b1 += sstride1; |
824 | 593k | a += dstride; |
825 | 593k | } |
826 | 25.2k | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<4, 4>(short*, long, unsigned char const*, unsigned char const*, long, long) pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<8, 8>(short*, long, unsigned char const*, unsigned char const*, long, long) Line | Count | Source | 816 | 9.66k | { | 817 | 86.9k | for (int y = 0; y < by; y++) | 818 | 77.2k | { | 819 | 695k | for (int x = 0; x < bx; x++) | 820 | 618k | a[x] = (int16_t)(b0[x] - b1[x]); | 821 | | | 822 | 77.2k | b0 += sstride0; | 823 | 77.2k | b1 += sstride1; | 824 | 77.2k | a += dstride; | 825 | 77.2k | } | 826 | 9.66k | } |
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<16, 16>(short*, long, unsigned char const*, unsigned char const*, long, long) Line | Count | Source | 816 | 5.45k | { | 817 | 92.7k | for (int y = 0; y < by; y++) | 818 | 87.2k | { | 819 | 1.48M | for (int x = 0; x < bx; x++) | 820 | 1.39M | a[x] = (int16_t)(b0[x] - b1[x]); | 821 | | | 822 | 87.2k | b0 += sstride0; | 823 | 87.2k | b1 += sstride1; | 824 | 87.2k | a += dstride; | 825 | 87.2k | } | 826 | 5.45k | } |
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<32, 32>(short*, long, unsigned char const*, unsigned char const*, long, long) Line | Count | Source | 816 | 6.86k | { | 817 | 226k | for (int y = 0; y < by; y++) | 818 | 219k | { | 819 | 7.24M | for (int x = 0; x < bx; x++) | 820 | 7.02M | a[x] = (int16_t)(b0[x] - b1[x]); | 821 | | | 822 | 219k | b0 += sstride0; | 823 | 219k | b1 += sstride1; | 824 | 219k | a += dstride; | 825 | 219k | } | 826 | 6.86k | } |
pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<64, 64>(short*, long, unsigned char const*, unsigned char const*, long, long) Line | Count | Source | 816 | 3.27k | { | 817 | 212k | for (int y = 0; y < by; y++) | 818 | 209k | { | 819 | 13.6M | for (int x = 0; x < bx; x++) | 820 | 13.4M | a[x] = (int16_t)(b0[x] - b1[x]); | 821 | | | 822 | 209k | b0 += sstride0; | 823 | 209k | b1 += sstride1; | 824 | 209k | a += dstride; | 825 | 209k | } | 826 | 3.27k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<2, 2>(short*, long, unsigned char const*, unsigned char const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<2, 4>(short*, long, unsigned char const*, unsigned char const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<4, 8>(short*, long, unsigned char const*, unsigned char const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<8, 16>(short*, long, unsigned char const*, unsigned char const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<16, 32>(short*, long, unsigned char const*, unsigned char const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_sub_ps_c<32, 64>(short*, long, unsigned char const*, unsigned char const*, long, long) |
827 | | |
828 | | template<int bx, int by> |
829 | | void pixel_add_ps_c(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1) |
830 | 53.4k | { |
831 | 568k | for (int y = 0; y < by; y++) |
832 | 514k | { |
833 | 8.67M | for (int x = 0; x < bx; x++) |
834 | 8.15M | a[x] = x265_clip(b0[x] + b1[x]); |
835 | | |
836 | 514k | b0 += sstride0; |
837 | 514k | b1 += sstride1; |
838 | 514k | a += dstride; |
839 | 514k | } |
840 | 53.4k | } pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<4, 4>(unsigned char*, long, unsigned char const*, short const*, long, long) Line | Count | Source | 830 | 24.1k | { | 831 | 120k | for (int y = 0; y < by; y++) | 832 | 96.6k | { | 833 | 483k | for (int x = 0; x < bx; x++) | 834 | 386k | a[x] = x265_clip(b0[x] + b1[x]); | 835 | | | 836 | 96.6k | b0 += sstride0; | 837 | 96.6k | b1 += sstride1; | 838 | 96.6k | a += dstride; | 839 | 96.6k | } | 840 | 24.1k | } |
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<8, 8>(unsigned char*, long, unsigned char const*, short const*, long, long) Line | Count | Source | 830 | 13.9k | { | 831 | 125k | for (int y = 0; y < by; y++) | 832 | 111k | { | 833 | 1.00M | for (int x = 0; x < bx; x++) | 834 | 891k | a[x] = x265_clip(b0[x] + b1[x]); | 835 | | | 836 | 111k | b0 += sstride0; | 837 | 111k | b1 += sstride1; | 838 | 111k | a += dstride; | 839 | 111k | } | 840 | 13.9k | } |
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<16, 16>(unsigned char*, long, unsigned char const*, short const*, long, long) Line | Count | Source | 830 | 11.4k | { | 831 | 195k | for (int y = 0; y < by; y++) | 832 | 183k | { | 833 | 3.12M | for (int x = 0; x < bx; x++) | 834 | 2.94M | a[x] = x265_clip(b0[x] + b1[x]); | 835 | | | 836 | 183k | b0 += sstride0; | 837 | 183k | b1 += sstride1; | 838 | 183k | a += dstride; | 839 | 183k | } | 840 | 11.4k | } |
pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<32, 32>(unsigned char*, long, unsigned char const*, short const*, long, long) Line | Count | Source | 830 | 3.84k | { | 831 | 126k | for (int y = 0; y < by; y++) | 832 | 122k | { | 833 | 4.05M | for (int x = 0; x < bx; x++) | 834 | 3.93M | a[x] = x265_clip(b0[x] + b1[x]); | 835 | | | 836 | 122k | b0 += sstride0; | 837 | 122k | b1 += sstride1; | 838 | 122k | a += dstride; | 839 | 122k | } | 840 | 3.84k | } |
Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<64, 64>(unsigned char*, long, unsigned char const*, short const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<2, 2>(unsigned char*, long, unsigned char const*, short const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<2, 4>(unsigned char*, long, unsigned char const*, short const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<4, 8>(unsigned char*, long, unsigned char const*, short const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<8, 16>(unsigned char*, long, unsigned char const*, short const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<16, 32>(unsigned char*, long, unsigned char const*, short const*, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::pixel_add_ps_c<32, 64>(unsigned char*, long, unsigned char const*, short const*, long, long) |
841 | | |
842 | | template<int bx, int by> |
843 | | void addAvg(const int16_t* src0, const int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride) |
844 | 0 | { |
845 | 0 | int shiftNum, offset; |
846 | |
|
847 | 0 | shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; |
848 | 0 | offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; |
849 | |
|
850 | 0 | for (int y = 0; y < by; y++) |
851 | 0 | { |
852 | 0 | for (int x = 0; x < bx; x += 2) |
853 | 0 | { |
854 | 0 | dst[x + 0] = x265_clip((src0[x + 0] + src1[x + 0] + offset) >> shiftNum); |
855 | 0 | dst[x + 1] = x265_clip((src0[x + 1] + src1[x + 1] + offset) >> shiftNum); |
856 | 0 | } |
857 | |
|
858 | 0 | src0 += src0Stride; |
859 | 0 | src1 += src1Stride; |
860 | 0 | dst += dstStride; |
861 | 0 | } |
862 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 4>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 8>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 64>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 8>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 4>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 8>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 12>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<12, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 4>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 24>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<24, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 8>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 64>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 48>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<48, 64>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<64, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 64>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 2>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 4>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 2>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 6>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<6, 8>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 2>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 8>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 12>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<6, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<2, 16>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<16, 24>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<12, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<4, 32>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<32, 48>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<24, 64>(short const*, short const*, unsigned char*, long, long, long) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::addAvg<8, 64>(short const*, short const*, unsigned char*, long, long, long) |
863 | | |
864 | | static void planecopy_cp_c(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift) |
865 | 0 | { |
866 | 0 | for (int r = 0; r < height; r++) |
867 | 0 | { |
868 | 0 | for (int c = 0; c < width; c++) |
869 | 0 | dst[c] = ((pixel)src[c]) << shift; |
870 | |
|
871 | 0 | dst += dstStride; |
872 | 0 | src += srcStride; |
873 | 0 | } |
874 | 0 | } |
875 | | |
876 | | static void planecopy_sp_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask) |
877 | 0 | { |
878 | 0 | for (int r = 0; r < height; r++) |
879 | 0 | { |
880 | 0 | for (int c = 0; c < width; c++) |
881 | 0 | dst[c] = (pixel)((src[c] >> shift) & mask); |
882 | |
|
883 | 0 | dst += dstStride; |
884 | 0 | src += srcStride; |
885 | 0 | } |
886 | 0 | } |
887 | | |
888 | | static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift) |
889 | 0 | { |
890 | 0 | for (int r = 0; r < height; r++) |
891 | 0 | { |
892 | 0 | for (int c = 0; c < width; c++) |
893 | 0 | dst[c] = (pixel)((src[c] >> shift)); |
894 | |
|
895 | 0 | dst += dstStride; |
896 | 0 | src += srcStride; |
897 | 0 | } |
898 | 0 | } |
899 | | |
900 | | static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask) |
901 | 0 | { |
902 | 0 | for (int r = 0; r < height; r++) |
903 | 0 | { |
904 | 0 | for (int c = 0; c < width; c++) |
905 | 0 | dst[c] = (pixel)((src[c] << shift) & mask); |
906 | |
|
907 | 0 | dst += dstStride; |
908 | 0 | src += srcStride; |
909 | 0 | } |
910 | 0 | } |
911 | | |
912 | | /* Estimate the total amount of influence on future quality that could be had if we |
913 | | * were to improve the reference samples used to inter predict any given CU. */ |
914 | | static void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, |
915 | | const int32_t* invQscales, const double* fpsFactor, int len) |
916 | 0 | { |
917 | 0 | double fps = *fpsFactor / 256; // range[0.01, 1.00] |
918 | 0 | for (int i = 0; i < len; i++) |
919 | 0 | { |
920 | 0 | int intraCost = intraCosts[i]; |
921 | 0 | int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK); |
922 | 0 | double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8 |
923 | 0 | double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0 |
924 | 0 | double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0 |
925 | |
|
926 | | #if 0 |
927 | | // algorithm that output match to asm |
928 | | float intraRcp = (float)1.0f / intraCost; // VC can't mapping this into RCPPS |
929 | | float intraRcpError1 = (float)intraCost * (float)intraRcp; |
930 | | intraRcpError1 *= (float)intraRcp; |
931 | | float intraRcpError2 = intraRcp + intraRcp; |
932 | | float propagateDenom = intraRcpError2 - intraRcpError1; |
933 | | dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5); |
934 | | #else |
935 | 0 | double propagateDenom = (double)intraCost; // Q32 |
936 | 0 | dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5); |
937 | 0 | #endif |
938 | 0 | } |
939 | | //} |
940 | 0 | } |
941 | | |
942 | | /* Conversion between double and Q8.8 fixed point (big-endian) for storage */ |
943 | | static void cuTreeFix8Pack(uint16_t *dst, double *src, int count) |
944 | 0 | { |
945 | 0 | for (int i = 0; i < count; i++) |
946 | 0 | dst[i] = (uint16_t)(int16_t)(src[i] * 256.0); |
947 | 0 | } |
948 | | |
949 | | static void cuTreeFix8Unpack(double *dst, uint16_t *src, int count) |
950 | 0 | { |
951 | 0 | for (int i = 0; i < count; i++) |
952 | 0 | { |
953 | 0 | int16_t qpFix8 = src[i]; |
954 | 0 | dst[i] = (double)(qpFix8) / 256.0; |
955 | 0 | } |
956 | 0 | } |
957 | | |
958 | | template<int log2TrSize> |
959 | | static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k) |
960 | 0 | { |
961 | 0 | *ssBlock = 0; |
962 | 0 | int trSize = 1 << log2TrSize; |
963 | 0 | for (int y = 0; y < trSize; y++) |
964 | 0 | { |
965 | 0 | for (int x = 0; x < trSize; x++) |
966 | 0 | { |
967 | 0 | int temp = fenc[y * fStride + x] - recon[y * rstride + x]; // copy of residual coeff |
968 | 0 | *ssBlock += temp * temp; |
969 | 0 | } |
970 | 0 | } |
971 | |
|
972 | 0 | *ac_k = 0; |
973 | 0 | for (int block_yy = 0; block_yy < trSize; block_yy += 1) |
974 | 0 | { |
975 | 0 | for (int block_xx = 0; block_xx < trSize; block_xx += 1) |
976 | 0 | { |
977 | 0 | uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift; |
978 | 0 | *ac_k += temp * temp; |
979 | 0 | } |
980 | 0 | } |
981 | 0 | } Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<2>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<3>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<4>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<5>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*) Unexecuted instantiation: pixel.cpp:void (anonymous namespace)::ssimDist_c<6>(unsigned char const*, unsigned int, unsigned char const*, long, unsigned long*, int, unsigned long*) |
982 | | |
983 | | static void normFact_c(const pixel* src, uint32_t blockSize, int shift, uint64_t *z_k) |
984 | 0 | { |
985 | 0 | *z_k = 0; |
986 | 0 | for (uint32_t block_yy = 0; block_yy < blockSize; block_yy += 1) |
987 | 0 | { |
988 | 0 | for (uint32_t block_xx = 0; block_xx < blockSize; block_xx += 1) |
989 | 0 | { |
990 | 0 | uint32_t temp = src[block_yy * blockSize + block_xx] >> shift; |
991 | 0 | *z_k += temp * temp; |
992 | 0 | } |
993 | 0 | } |
994 | 0 | } |
995 | | |
996 | | #if HIGH_BIT_DEPTH |
997 | | static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, |
998 | | const pixel minPix, const pixel maxPix) |
999 | | { |
1000 | | pixel maxLumaLevel = 0; |
1001 | | uint64_t sumLuma = 0; |
1002 | | |
1003 | | for (int r = 0; r < height; r++) |
1004 | | { |
1005 | | for (int c = 0; c < width; c++) |
1006 | | { |
1007 | | /* Clip luma of source picture to max and min*/ |
1008 | | src[c] = x265_clip3((pixel)minPix, (pixel)maxPix, src[c]); |
1009 | | maxLumaLevel = X265_MAX(src[c], maxLumaLevel); |
1010 | | sumLuma += src[c]; |
1011 | | } |
1012 | | src += stride; |
1013 | | } |
1014 | | *outsum = sumLuma; |
1015 | | return maxLumaLevel; |
1016 | | } |
1017 | | |
1018 | | #endif |
1019 | | } // end anonymous namespace |
1020 | | |
1021 | | namespace X265_NS { |
1022 | | // x265 private namespace |
1023 | | |
1024 | | /* Extend the edges of a picture so that it may safely be used for motion |
1025 | | * compensation. This function assumes the picture is stored in a buffer with |
1026 | | * sufficient padding for the X and Y margins */ |
1027 | | void extendPicBorder(pixel* pic, intptr_t stride, int width, int height, int marginX, int marginY) |
1028 | 2.79k | { |
1029 | | /* extend left and right margins */ |
1030 | 2.79k | primitives.extendRowBorder(pic, stride, width, height, marginX); |
1031 | | |
1032 | | /* copy top row to create above margin */ |
1033 | 2.79k | pixel* top = pic - marginX; |
1034 | 189k | for (int y = 0; y < marginY; y++) |
1035 | 186k | memcpy(top - (y + 1) * stride, top, stride * sizeof(pixel)); |
1036 | | |
1037 | | /* copy bottom row to create below margin */ |
1038 | 2.79k | pixel* bot = pic - marginX + (height - 1) * stride; |
1039 | 189k | for (int y = 0; y < marginY; y++) |
1040 | 186k | memcpy(bot + (y + 1) * stride, bot, stride * sizeof(pixel)); |
1041 | 2.79k | } |
1042 | | |
1043 | | /* Initialize entries for pixel functions defined in this file */ |
1044 | | void setupPixelPrimitives_c(EncoderPrimitives &p) |
1045 | 1 | { |
1046 | 1 | #define LUMA_PU(W, H) \ |
1047 | 25 | p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \ |
1048 | 25 | p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg<W, H>; \ |
1049 | 25 | p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg<W, H>; \ |
1050 | 25 | p.pu[LUMA_ ## W ## x ## H].sad = sad<W, H>; \ |
1051 | 25 | p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3<W, H>; \ |
1052 | 25 | p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4<W, H>; \ |
1053 | 25 | p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp<W, H>; \ |
1054 | 25 | p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[ALIGNED] = pixelavg_pp<W, H>; |
1055 | 1 | #define LUMA_CU(W, H) \ |
1056 | 5 | p.cu[BLOCK_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \ |
1057 | 5 | p.cu[BLOCK_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \ |
1058 | 5 | p.cu[BLOCK_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>; \ |
1059 | 5 | p.cu[BLOCK_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \ |
1060 | 5 | p.cu[BLOCK_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \ |
1061 | 5 | p.cu[BLOCK_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \ |
1062 | 5 | p.cu[BLOCK_ ## W ## x ## H].blockfill_s[NONALIGNED] = blockfill_s_c<W>; \ |
1063 | 5 | p.cu[BLOCK_ ## W ## x ## H].blockfill_s[ALIGNED] = blockfill_s_c<W>; \ |
1064 | 5 | p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shl = cpy2Dto1D_shl<W>; \ |
1065 | 5 | p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shr = cpy2Dto1D_shr<W>; \ |
1066 | 5 | p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[NONALIGNED] = cpy1Dto2D_shl<W>; \ |
1067 | 5 | p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[ALIGNED] = cpy1Dto2D_shl<W>; \ |
1068 | 5 | p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shr = cpy1Dto2D_shr<W>; \ |
1069 | 5 | p.cu[BLOCK_ ## W ## x ## H].psy_cost_pp = psyCost_pp<BLOCK_ ## W ## x ## H>; \ |
1070 | 5 | p.cu[BLOCK_ ## W ## x ## H].transpose = transpose<W>; \ |
1071 | 5 | p.cu[BLOCK_ ## W ## x ## H].ssd_s[NONALIGNED] = pixel_ssd_s_c<W>; \ |
1072 | 5 | p.cu[BLOCK_ ## W ## x ## H].ssd_s[ALIGNED] = pixel_ssd_s_c<W>; \ |
1073 | 5 | p.cu[BLOCK_ ## W ## x ## H].var = pixel_var<W>; \ |
1074 | 5 | p.cu[BLOCK_ ## W ## x ## H].calcresidual[NONALIGNED] = getResidual<W>; \ |
1075 | 5 | p.cu[BLOCK_ ## W ## x ## H].calcresidual[ALIGNED] = getResidual<W>; \ |
1076 | 5 | p.cu[BLOCK_ ## W ## x ## H].sse_pp = sse<W, H, pixel, pixel>; \ |
1077 | 5 | p.cu[BLOCK_ ## W ## x ## H].sse_ss = sse<W, H, int16_t, int16_t>; |
1078 | | |
1079 | 1 | LUMA_PU(4, 4); |
1080 | 1 | LUMA_PU(8, 8); |
1081 | 1 | LUMA_PU(16, 16); |
1082 | 1 | LUMA_PU(32, 32); |
1083 | 1 | LUMA_PU(64, 64); |
1084 | 1 | LUMA_PU(4, 8); |
1085 | 1 | LUMA_PU(8, 4); |
1086 | 1 | LUMA_PU(16, 8); |
1087 | 1 | LUMA_PU(8, 16); |
1088 | 1 | LUMA_PU(16, 12); |
1089 | 1 | LUMA_PU(12, 16); |
1090 | 1 | LUMA_PU(16, 4); |
1091 | 1 | LUMA_PU(4, 16); |
1092 | 1 | LUMA_PU(32, 16); |
1093 | 1 | LUMA_PU(16, 32); |
1094 | 1 | LUMA_PU(32, 24); |
1095 | 1 | LUMA_PU(24, 32); |
1096 | 1 | LUMA_PU(32, 8); |
1097 | 1 | LUMA_PU(8, 32); |
1098 | 1 | LUMA_PU(64, 32); |
1099 | 1 | LUMA_PU(32, 64); |
1100 | 1 | LUMA_PU(64, 48); |
1101 | 1 | LUMA_PU(48, 64); |
1102 | 1 | LUMA_PU(64, 16); |
1103 | 1 | LUMA_PU(16, 64); |
1104 | | |
1105 | 1 | p.pu[LUMA_4x4].ads = ads_x1<4, 4>; |
1106 | 1 | p.pu[LUMA_8x8].ads = ads_x1<8, 8>; |
1107 | 1 | p.pu[LUMA_8x4].ads = ads_x2<8, 4>; |
1108 | 1 | p.pu[LUMA_4x8].ads = ads_x2<4, 8>; |
1109 | 1 | p.pu[LUMA_16x16].ads = ads_x4<16, 16>; |
1110 | 1 | p.pu[LUMA_16x8].ads = ads_x2<16, 8>; |
1111 | 1 | p.pu[LUMA_8x16].ads = ads_x2<8, 16>; |
1112 | 1 | p.pu[LUMA_16x12].ads = ads_x1<16, 12>; |
1113 | 1 | p.pu[LUMA_12x16].ads = ads_x1<12, 16>; |
1114 | 1 | p.pu[LUMA_16x4].ads = ads_x1<16, 4>; |
1115 | 1 | p.pu[LUMA_4x16].ads = ads_x1<4, 16>; |
1116 | 1 | p.pu[LUMA_32x32].ads = ads_x4<32, 32>; |
1117 | 1 | p.pu[LUMA_32x16].ads = ads_x2<32, 16>; |
1118 | 1 | p.pu[LUMA_16x32].ads = ads_x2<16, 32>; |
1119 | 1 | p.pu[LUMA_32x24].ads = ads_x4<32, 24>; |
1120 | 1 | p.pu[LUMA_24x32].ads = ads_x4<24, 32>; |
1121 | 1 | p.pu[LUMA_32x8].ads = ads_x4<32, 8>; |
1122 | 1 | p.pu[LUMA_8x32].ads = ads_x4<8, 32>; |
1123 | 1 | p.pu[LUMA_64x64].ads = ads_x4<64, 64>; |
1124 | 1 | p.pu[LUMA_64x32].ads = ads_x2<64, 32>; |
1125 | 1 | p.pu[LUMA_32x64].ads = ads_x2<32, 64>; |
1126 | 1 | p.pu[LUMA_64x48].ads = ads_x4<64, 48>; |
1127 | 1 | p.pu[LUMA_48x64].ads = ads_x4<48, 64>; |
1128 | 1 | p.pu[LUMA_64x16].ads = ads_x4<64, 16>; |
1129 | 1 | p.pu[LUMA_16x64].ads = ads_x4<16, 64>; |
1130 | | |
1131 | 1 | p.pu[LUMA_4x4].satd = satd_4x4; |
1132 | 1 | p.pu[LUMA_8x8].satd = satd8<8, 8>; |
1133 | 1 | p.pu[LUMA_8x4].satd = satd_8x4; |
1134 | 1 | p.pu[LUMA_4x8].satd = satd4<4, 8>; |
1135 | 1 | p.pu[LUMA_16x16].satd = satd8<16, 16>; |
1136 | 1 | p.pu[LUMA_16x8].satd = satd8<16, 8>; |
1137 | 1 | p.pu[LUMA_8x16].satd = satd8<8, 16>; |
1138 | 1 | p.pu[LUMA_16x12].satd = satd8<16, 12>; |
1139 | 1 | p.pu[LUMA_12x16].satd = satd4<12, 16>; |
1140 | 1 | p.pu[LUMA_16x4].satd = satd8<16, 4>; |
1141 | 1 | p.pu[LUMA_4x16].satd = satd4<4, 16>; |
1142 | 1 | p.pu[LUMA_32x32].satd = satd8<32, 32>; |
1143 | 1 | p.pu[LUMA_32x16].satd = satd8<32, 16>; |
1144 | 1 | p.pu[LUMA_16x32].satd = satd8<16, 32>; |
1145 | 1 | p.pu[LUMA_32x24].satd = satd8<32, 24>; |
1146 | 1 | p.pu[LUMA_24x32].satd = satd8<24, 32>; |
1147 | 1 | p.pu[LUMA_32x8].satd = satd8<32, 8>; |
1148 | 1 | p.pu[LUMA_8x32].satd = satd8<8, 32>; |
1149 | 1 | p.pu[LUMA_64x64].satd = satd8<64, 64>; |
1150 | 1 | p.pu[LUMA_64x32].satd = satd8<64, 32>; |
1151 | 1 | p.pu[LUMA_32x64].satd = satd8<32, 64>; |
1152 | 1 | p.pu[LUMA_64x48].satd = satd8<64, 48>; |
1153 | 1 | p.pu[LUMA_48x64].satd = satd8<48, 64>; |
1154 | 1 | p.pu[LUMA_64x16].satd = satd8<64, 16>; |
1155 | 1 | p.pu[LUMA_16x64].satd = satd8<16, 64>; |
1156 | | |
1157 | 1 | LUMA_CU(4, 4); |
1158 | 1 | LUMA_CU(8, 8); |
1159 | 1 | LUMA_CU(16, 16); |
1160 | 1 | LUMA_CU(32, 32); |
1161 | 1 | LUMA_CU(64, 64); |
1162 | | |
1163 | 1 | p.cu[BLOCK_4x4].sa8d = satd_4x4; |
1164 | 1 | p.cu[BLOCK_8x8].sa8d = sa8d_8x8; |
1165 | 1 | p.cu[BLOCK_16x16].sa8d = sa8d_16x16; |
1166 | 1 | p.cu[BLOCK_32x32].sa8d = sa8d16<32, 32>; |
1167 | 1 | p.cu[BLOCK_64x64].sa8d = sa8d16<64, 64>; |
1168 | | |
1169 | 1 | #define CHROMA_PU_420(W, H) \ |
1170 | 25 | p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg<W, H>; \ |
1171 | 25 | p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[ALIGNED] = addAvg<W, H>; \ |
1172 | 25 | p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \ |
1173 | 1 | |
1174 | 1 | CHROMA_PU_420(2, 2); |
1175 | 1 | CHROMA_PU_420(2, 4); |
1176 | 1 | CHROMA_PU_420(4, 4); |
1177 | 1 | CHROMA_PU_420(8, 8); |
1178 | 1 | CHROMA_PU_420(16, 16); |
1179 | 1 | CHROMA_PU_420(32, 32); |
1180 | 1 | CHROMA_PU_420(4, 2); |
1181 | 1 | CHROMA_PU_420(8, 4); |
1182 | 1 | CHROMA_PU_420(4, 8); |
1183 | 1 | CHROMA_PU_420(8, 6); |
1184 | 1 | CHROMA_PU_420(6, 8); |
1185 | 1 | CHROMA_PU_420(8, 2); |
1186 | 1 | CHROMA_PU_420(2, 8); |
1187 | 1 | CHROMA_PU_420(16, 8); |
1188 | 1 | CHROMA_PU_420(8, 16); |
1189 | 1 | CHROMA_PU_420(16, 12); |
1190 | 1 | CHROMA_PU_420(12, 16); |
1191 | 1 | CHROMA_PU_420(16, 4); |
1192 | 1 | CHROMA_PU_420(4, 16); |
1193 | 1 | CHROMA_PU_420(32, 16); |
1194 | 1 | CHROMA_PU_420(16, 32); |
1195 | 1 | CHROMA_PU_420(32, 24); |
1196 | 1 | CHROMA_PU_420(24, 32); |
1197 | 1 | CHROMA_PU_420(32, 8); |
1198 | 1 | CHROMA_PU_420(8, 32); |
1199 | | |
1200 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_2x2].satd = NULL; |
1201 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd = satd_4x4; |
1202 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd = satd8<8, 8>; |
1203 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = satd8<16, 16>; |
1204 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = satd8<32, 32>; |
1205 | | |
1206 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].satd = NULL; |
1207 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].satd = NULL; |
1208 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd = satd_8x4; |
1209 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd = satd4<4, 8>; |
1210 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd = satd8<16, 8>; |
1211 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].satd = satd8<8, 16>; |
1212 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = satd8<32, 16>; |
1213 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = satd8<16, 32>; |
1214 | | |
1215 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].satd = NULL; |
1216 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].satd = NULL; |
1217 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].satd = NULL; |
1218 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].satd = NULL; |
1219 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].satd = satd4<16, 12>; |
1220 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = satd4<12, 16>; |
1221 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].satd = satd4<16, 4>; |
1222 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd = satd4<4, 16>; |
1223 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = satd8<32, 24>; |
1224 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].satd = satd8<24, 32>; |
1225 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].satd = satd8<32, 8>; |
1226 | 1 | p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].satd = satd8<8, 32>; |
1227 | | |
1228 | 1 | #define CHROMA_CU_420(W, H) \ |
1229 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sse_pp = sse<W, H, pixel, pixel>; \ |
1230 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \ |
1231 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \ |
1232 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \ |
1233 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \ |
1234 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \ |
1235 | 5 | p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>; |
1236 | | |
1237 | 1 | CHROMA_CU_420(2, 2) |
1238 | 1 | CHROMA_CU_420(4, 4) |
1239 | 1 | CHROMA_CU_420(8, 8) |
1240 | 1 | CHROMA_CU_420(16, 16) |
1241 | 1 | CHROMA_CU_420(32, 32) |
1242 | | |
1243 | 1 | p.chroma[X265_CSP_I420].cu[BLOCK_8x8].sa8d = p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd; |
1244 | 1 | p.chroma[X265_CSP_I420].cu[BLOCK_16x16].sa8d = sa8d8<8, 8>; |
1245 | 1 | p.chroma[X265_CSP_I420].cu[BLOCK_32x32].sa8d = sa8d16<16, 16>; |
1246 | 1 | p.chroma[X265_CSP_I420].cu[BLOCK_64x64].sa8d = sa8d16<32, 32>; |
1247 | | |
1248 | 1 | #define CHROMA_PU_422(W, H) \ |
1249 | 25 | p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg<W, H>; \ |
1250 | 25 | p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[ALIGNED] = addAvg<W, H>; \ |
1251 | 25 | p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \ |
1252 | 1 | |
1253 | 1 | CHROMA_PU_422(2, 4); |
1254 | 1 | CHROMA_PU_422(4, 8); |
1255 | 1 | CHROMA_PU_422(8, 16); |
1256 | 1 | CHROMA_PU_422(16, 32); |
1257 | 1 | CHROMA_PU_422(32, 64); |
1258 | 1 | CHROMA_PU_422(4, 4); |
1259 | 1 | CHROMA_PU_422(2, 8); |
1260 | 1 | CHROMA_PU_422(8, 8); |
1261 | 1 | CHROMA_PU_422(4, 16); |
1262 | 1 | CHROMA_PU_422(8, 12); |
1263 | 1 | CHROMA_PU_422(6, 16); |
1264 | 1 | CHROMA_PU_422(8, 4); |
1265 | 1 | CHROMA_PU_422(2, 16); |
1266 | 1 | CHROMA_PU_422(16, 16); |
1267 | 1 | CHROMA_PU_422(8, 32); |
1268 | 1 | CHROMA_PU_422(16, 24); |
1269 | 1 | CHROMA_PU_422(12, 32); |
1270 | 1 | CHROMA_PU_422(16, 8); |
1271 | 1 | CHROMA_PU_422(4, 32); |
1272 | 1 | CHROMA_PU_422(32, 32); |
1273 | 1 | CHROMA_PU_422(16, 64); |
1274 | 1 | CHROMA_PU_422(32, 48); |
1275 | 1 | CHROMA_PU_422(24, 64); |
1276 | 1 | CHROMA_PU_422(32, 16); |
1277 | 1 | CHROMA_PU_422(8, 64); |
1278 | | |
1279 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_2x4].satd = NULL; |
1280 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd = satd4<4, 8>; |
1281 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].satd = satd8<8, 16>; |
1282 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].satd = satd8<16, 32>; |
1283 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].satd = satd8<32, 64>; |
1284 | | |
1285 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd = satd_4x4; |
1286 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].satd = NULL; |
1287 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd = satd8<8, 8>; |
1288 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd = satd4<4, 16>; |
1289 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].satd = satd8<16, 16>; |
1290 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].satd = satd8<8, 32>; |
1291 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].satd = satd8<32, 32>; |
1292 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].satd = satd8<16, 64>; |
1293 | | |
1294 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd = satd4<8, 12>; |
1295 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].satd = NULL; |
1296 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd = satd4<8, 4>; |
1297 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].satd = NULL; |
1298 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].satd = satd8<16, 24>; |
1299 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = satd4<12, 32>; |
1300 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].satd = satd8<16, 8>; |
1301 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = satd4<4, 32>; |
1302 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].satd = satd8<32, 48>; |
1303 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].satd = satd8<24, 64>; |
1304 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = satd8<32, 16>; |
1305 | 1 | p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd = satd8<8, 64>; |
1306 | | |
1307 | 1 | #define CHROMA_CU_422(W, H) \ |
1308 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sse_pp = sse<W, H, pixel, pixel>; \ |
1309 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \ |
1310 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \ |
1311 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>; \ |
1312 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \ |
1313 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_c<W, H>; \ |
1314 | 5 | p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_c<W, H>; |
1315 | | |
1316 | 1 | CHROMA_CU_422(2, 4) |
1317 | 1 | CHROMA_CU_422(4, 8) |
1318 | 1 | CHROMA_CU_422(8, 16) |
1319 | 1 | CHROMA_CU_422(16, 32) |
1320 | 1 | CHROMA_CU_422(32, 64) |
1321 | | |
1322 | 1 | p.chroma[X265_CSP_I422].cu[BLOCK_8x8].sa8d = p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd; |
1323 | 1 | p.chroma[X265_CSP_I422].cu[BLOCK_16x16].sa8d = sa8d8<8, 16>; |
1324 | 1 | p.chroma[X265_CSP_I422].cu[BLOCK_32x32].sa8d = sa8d16<16, 32>; |
1325 | 1 | p.chroma[X265_CSP_I422].cu[BLOCK_64x64].sa8d = sa8d16<32, 64>; |
1326 | | |
1327 | 1 | p.weight_pp = weight_pp_c; |
1328 | 1 | p.weight_sp = weight_sp_c; |
1329 | | |
1330 | 1 | p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64; |
1331 | 1 | p.scale2D_64to32 = scale2D_64to32; |
1332 | 1 | p.frameInitLowres = frame_init_lowres_core; |
1333 | 1 | p.frameInitLowerRes = frame_init_lowres_core; |
1334 | 1 | p.ssim_4x4x2_core = ssim_4x4x2_core; |
1335 | 1 | p.ssim_end_4 = ssim_end_4; |
1336 | | |
1337 | 1 | p.planecopy_cp = planecopy_cp_c; |
1338 | 1 | p.planecopy_sp = planecopy_sp_c; |
1339 | 1 | p.planecopy_sp_shl = planecopy_sp_shl_c; |
1340 | 1 | p.planecopy_pp_shr = planecopy_pp_shr_c; |
1341 | | #if HIGH_BIT_DEPTH |
1342 | | p.planeClipAndMax = planeClipAndMax_c; |
1343 | | #endif |
1344 | 1 | p.propagateCost = estimateCUPropagateCost; |
1345 | 1 | p.fix8Unpack = cuTreeFix8Unpack; |
1346 | 1 | p.fix8Pack = cuTreeFix8Pack; |
1347 | | |
1348 | 1 | p.cu[BLOCK_4x4].ssimDist = ssimDist_c<2>; |
1349 | 1 | p.cu[BLOCK_8x8].ssimDist = ssimDist_c<3>; |
1350 | 1 | p.cu[BLOCK_16x16].ssimDist = ssimDist_c<4>; |
1351 | 1 | p.cu[BLOCK_32x32].ssimDist = ssimDist_c<5>; |
1352 | 1 | p.cu[BLOCK_64x64].ssimDist = ssimDist_c<6>; |
1353 | | |
1354 | 1 | p.cu[BLOCK_8x8].normFact = normFact_c; |
1355 | 1 | p.cu[BLOCK_16x16].normFact = normFact_c; |
1356 | 1 | p.cu[BLOCK_32x32].normFact = normFact_c; |
1357 | 1 | p.cu[BLOCK_64x64].normFact = normFact_c; |
1358 | 1 | } |
1359 | | } |