Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/common/primitives.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *
6
 * This program is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
19
 *
20
 * This program is also available under a commercial proprietary license.
21
 * For more information, contact us at license @ x265.com.
22
 *****************************************************************************/
23
24
#include "common.h"
25
#include "primitives.h"
26
27
namespace X265_NS {
28
// x265 private namespace
29
30
extern const uint8_t lumaPartitionMapTable[] =
31
{
32
//  4          8          12          16          20  24          28  32          36  40  44  48          52  56  60  64
33
    LUMA_4x4,  LUMA_4x8,  255,        LUMA_4x16,  255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 4
34
    LUMA_8x4,  LUMA_8x8,  255,        LUMA_8x16,  255, 255,        255, LUMA_8x32,  255, 255, 255, 255,        255, 255, 255, 255,        // 8
35
    255,        255,      255,        LUMA_12x16, 255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 12
36
    LUMA_16x4, LUMA_16x8, LUMA_16x12, LUMA_16x16, 255, 255,        255, LUMA_16x32, 255, 255, 255, 255,        255, 255, 255, LUMA_16x64, // 16
37
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 20
38
    255,        255,      255,        255,        255, 255,        255, LUMA_24x32, 255, 255, 255, 255,        255, 255, 255, 255,        // 24
39
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 28
40
    255,        LUMA_32x8, 255,       LUMA_32x16, 255, LUMA_32x24, 255, LUMA_32x32, 255, 255, 255, 255,        255, 255, 255, LUMA_32x64, // 32
41
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 36
42
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 40
43
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 44
44
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, LUMA_48x64, // 48
45
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 52
46
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 56
47
    255,        255,      255,        255,        255, 255,        255, 255,        255, 255, 255, 255,        255, 255, 255, 255,        // 60
48
    255,        255,      255,        LUMA_64x16, 255, 255,        255, LUMA_64x32, 255, 255, 255, LUMA_64x48, 255, 255, 255, LUMA_64x64  // 64
49
};
50
51
/* the "authoritative" set of encoder primitives */
52
EncoderPrimitives primitives;
53
54
void setupPixelPrimitives_c(EncoderPrimitives &p);
55
void setupDCTPrimitives_c(EncoderPrimitives &p);
56
void setupFilterPrimitives_c(EncoderPrimitives &p);
57
void setupIntraPrimitives_c(EncoderPrimitives &p);
58
void setupLoopFilterPrimitives_c(EncoderPrimitives &p);
59
void setupSaoPrimitives_c(EncoderPrimitives &p);
60
void setupSeaIntegralPrimitives_c(EncoderPrimitives &p);
61
void setupLowPassPrimitives_c(EncoderPrimitives& p);
62
63
void setupCPrimitives(EncoderPrimitives &p)
64
0
{
65
0
    setupPixelPrimitives_c(p);      // pixel.cpp
66
0
    setupDCTPrimitives_c(p);        // dct.cpp
67
0
    setupLowPassPrimitives_c(p);    // lowpassdct.cpp
68
0
    setupFilterPrimitives_c(p);     // ipfilter.cpp
69
0
    setupIntraPrimitives_c(p);      // intrapred.cpp
70
0
    setupLoopFilterPrimitives_c(p); // loopfilter.cpp
71
0
    setupSaoPrimitives_c(p);        // sao.cpp
72
0
    setupSeaIntegralPrimitives_c(p);  // framefilter.cpp
73
0
}
74
75
void enableLowpassDCTPrimitives(EncoderPrimitives &p)
76
0
{
77
    // update copies of the standard dct transform
78
0
    p.cu[BLOCK_4x4].standard_dct = p.cu[BLOCK_4x4].dct;
79
0
    p.cu[BLOCK_8x8].standard_dct = p.cu[BLOCK_8x8].dct;
80
0
    p.cu[BLOCK_16x16].standard_dct = p.cu[BLOCK_16x16].dct;
81
0
    p.cu[BLOCK_32x32].standard_dct = p.cu[BLOCK_32x32].dct;
82
83
    // replace active dct by lowpass dct for high dct transforms
84
0
    p.cu[BLOCK_16x16].dct = p.cu[BLOCK_16x16].lowpass_dct;
85
0
    p.cu[BLOCK_32x32].dct = p.cu[BLOCK_32x32].lowpass_dct;
86
0
}
87
88
void setupAliasPrimitives(EncoderPrimitives &p)
89
0
{
90
#if HIGH_BIT_DEPTH
91
    /* at HIGH_BIT_DEPTH, pixel == short so we can alias many primitives */
92
    for (int i = 0; i < NUM_CU_SIZES; i++)
93
    {
94
#if !defined(X265_ARCH_ARM64)
95
        p.cu[i].sse_pp = (pixel_sse_t)p.cu[i].sse_ss;
96
#endif
97
98
        p.cu[i].copy_ps = (copy_ps_t)p.pu[i].copy_pp;
99
        p.cu[i].copy_sp = (copy_sp_t)p.pu[i].copy_pp;
100
        p.cu[i].copy_ss = (copy_ss_t)p.pu[i].copy_pp;
101
102
        p.chroma[X265_CSP_I420].cu[i].copy_ps = (copy_ps_t)p.chroma[X265_CSP_I420].pu[i].copy_pp;
103
        p.chroma[X265_CSP_I420].cu[i].copy_sp = (copy_sp_t)p.chroma[X265_CSP_I420].pu[i].copy_pp;
104
        p.chroma[X265_CSP_I420].cu[i].copy_ss = (copy_ss_t)p.chroma[X265_CSP_I420].pu[i].copy_pp;
105
106
        p.chroma[X265_CSP_I422].cu[i].copy_ps = (copy_ps_t)p.chroma[X265_CSP_I422].pu[i].copy_pp;
107
        p.chroma[X265_CSP_I422].cu[i].copy_sp = (copy_sp_t)p.chroma[X265_CSP_I422].pu[i].copy_pp;
108
        p.chroma[X265_CSP_I422].cu[i].copy_ss = (copy_ss_t)p.chroma[X265_CSP_I422].pu[i].copy_pp;
109
    }
110
#endif
111
112
    /* alias chroma 4:4:4 from luma primitives (all but chroma filters) */
113
114
0
    p.chroma[X265_CSP_I444].cu[BLOCK_4x4].sa8d = NULL;
115
116
0
    for (int i = 0; i < NUM_PU_SIZES; i++)
117
0
    {
118
0
        p.chroma[X265_CSP_I444].pu[i].copy_pp = p.pu[i].copy_pp;
119
0
        p.chroma[X265_CSP_I444].pu[i].addAvg[NONALIGNED]  = p.pu[i].addAvg[NONALIGNED];
120
0
        p.chroma[X265_CSP_I444].pu[i].addAvg[ALIGNED] = p.pu[i].addAvg[ALIGNED];
121
0
        p.chroma[X265_CSP_I444].pu[i].satd    = p.pu[i].satd;
122
0
        p.chroma[X265_CSP_I444].pu[i].p2s[NONALIGNED]     = p.pu[i].convert_p2s[NONALIGNED];
123
0
        p.chroma[X265_CSP_I444].pu[i].p2s[ALIGNED] = p.pu[i].convert_p2s[ALIGNED];
124
0
    }
125
126
0
    for (int i = 0; i < NUM_CU_SIZES; i++)
127
0
    {
128
0
        p.chroma[X265_CSP_I444].cu[i].sa8d    = p.cu[i].sa8d;
129
0
        p.chroma[X265_CSP_I444].cu[i].sse_pp  = p.cu[i].sse_pp;
130
0
        p.chroma[X265_CSP_I444].cu[i].sub_ps  = p.cu[i].sub_ps;
131
0
        p.chroma[X265_CSP_I444].cu[i].add_ps[NONALIGNED]  = p.cu[i].add_ps[NONALIGNED];
132
0
        p.chroma[X265_CSP_I444].cu[i].add_ps[ALIGNED] = p.cu[i].add_ps[ALIGNED];
133
0
        p.chroma[X265_CSP_I444].cu[i].copy_ps = p.cu[i].copy_ps;
134
0
        p.chroma[X265_CSP_I444].cu[i].copy_sp = p.cu[i].copy_sp;
135
0
        p.chroma[X265_CSP_I444].cu[i].copy_ss = p.cu[i].copy_ss;
136
0
    }
137
138
0
    p.cu[BLOCK_4x4].sa8d = p.pu[LUMA_4x4].satd;
139
140
    /* Chroma PU can often use luma satd primitives */
141
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd   = p.pu[LUMA_4x4].satd;
142
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd   = p.pu[LUMA_8x8].satd;
143
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = p.pu[LUMA_16x16].satd;
144
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = p.pu[LUMA_32x32].satd;
145
146
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd   = p.pu[LUMA_8x4].satd;
147
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd   = p.pu[LUMA_4x8].satd;
148
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd  = p.pu[LUMA_16x8].satd;
149
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].satd  = p.pu[LUMA_8x16].satd;
150
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = p.pu[LUMA_32x16].satd;
151
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = p.pu[LUMA_16x32].satd;
152
153
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].satd = p.pu[LUMA_16x12].satd;
154
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = p.pu[LUMA_12x16].satd;
155
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].satd  = p.pu[LUMA_16x4].satd;
156
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd  = p.pu[LUMA_4x16].satd;
157
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = p.pu[LUMA_32x24].satd;
158
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].satd = p.pu[LUMA_24x32].satd;
159
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].satd  = p.pu[LUMA_32x8].satd;
160
0
    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].satd  = p.pu[LUMA_8x32].satd;
161
162
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd   = p.pu[LUMA_4x8].satd;
163
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].satd  = p.pu[LUMA_8x16].satd;
164
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].satd = p.pu[LUMA_16x32].satd;
165
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].satd = p.pu[LUMA_32x64].satd;
166
167
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd   = p.pu[LUMA_4x4].satd;
168
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd   = p.pu[LUMA_8x8].satd;
169
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd  = p.pu[LUMA_4x16].satd;
170
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].satd = p.pu[LUMA_16x16].satd;
171
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].satd  = p.pu[LUMA_8x32].satd;
172
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].satd = p.pu[LUMA_32x32].satd;
173
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].satd = p.pu[LUMA_16x64].satd;
174
175
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_8x12]  = satd4<8, 12>;
176
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd  = p.pu[LUMA_8x4].satd;
177
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_16x24] = satd8<16, 24>;
178
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_12x32] = satd4<12, 32>;
179
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].satd = p.pu[LUMA_16x8].satd;
180
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_4x32]  = satd4<4, 32>;
181
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_32x48] = satd8<32, 48>;
182
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_24x64] = satd8<24, 64>;
183
0
    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = p.pu[LUMA_32x16].satd;
184
    //p.chroma[X265_CSP_I422].satd[CHROMA_422_8x64]  = satd8<8, 64>;
185
186
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_2x2].sa8d = NULL;
187
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].sa8d = p.pu[LUMA_4x4].satd;
188
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].sa8d = p.cu[BLOCK_8x8].sa8d;
189
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sa8d = p.cu[BLOCK_16x16].sa8d;
190
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sa8d = p.cu[BLOCK_32x32].sa8d;
191
192
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_2x4].sa8d = NULL;
193
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sa8d = p.pu[LUMA_4x8].satd;
194
195
    /* alias CU copy_pp from square PU copy_pp */
196
0
    for (int i = 0; i < NUM_CU_SIZES; i++)
197
0
    {
198
0
        p.cu[i].copy_pp = p.pu[i].copy_pp;
199
200
0
        for (int c = 0; c < X265_CSP_COUNT; c++)
201
0
            p.chroma[c].cu[i].copy_pp = p.chroma[c].pu[i].copy_pp;
202
0
    }
203
204
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_2x2].sse_pp = NULL;
205
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].sse_pp = p.cu[BLOCK_4x4].sse_pp;
206
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].sse_pp = p.cu[BLOCK_8x8].sse_pp;
207
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sse_pp = p.cu[BLOCK_16x16].sse_pp;
208
0
    p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sse_pp = p.cu[BLOCK_32x32].sse_pp;
209
210
0
    p.chroma[X265_CSP_I422].cu[BLOCK_422_2x4].sse_pp = NULL;
211
0
}
212
213
void x265_report_simd(x265_param* param)
214
0
{
215
0
    if (param->logLevel >= X265_LOG_INFO)
216
0
    {
217
0
        int cpuid = param->cpuid;
218
219
0
        char buf[1000];
220
0
        char *p = buf + snprintf(buf, sizeof(buf), "using cpu capabilities:");
221
0
        char *none = p;
222
0
        for (int i = 0; X265_NS::cpu_names[i].flags; i++)
223
0
        {
224
0
            if (!strcmp(X265_NS::cpu_names[i].name, "SSE")
225
0
                && (cpuid & X265_CPU_SSE2))
226
0
                continue;
227
0
            if (!strcmp(X265_NS::cpu_names[i].name, "SSE2")
228
0
                && (cpuid & (X265_CPU_SSE2_IS_FAST | X265_CPU_SSE2_IS_SLOW)))
229
0
                continue;
230
0
            if (!strcmp(X265_NS::cpu_names[i].name, "SSE3")
231
0
                && (cpuid & X265_CPU_SSSE3 || !(cpuid & X265_CPU_CACHELINE_64)))
232
0
                continue;
233
0
            if (!strcmp(X265_NS::cpu_names[i].name, "SSE4.1")
234
0
                && (cpuid & X265_CPU_SSE42))
235
0
                continue;
236
0
            if (!strcmp(X265_NS::cpu_names[i].name, "BMI1")
237
0
                && (cpuid & X265_CPU_BMI2))
238
0
                continue;
239
0
            if ((cpuid & X265_NS::cpu_names[i].flags) == X265_NS::cpu_names[i].flags
240
0
                && (!i || X265_NS::cpu_names[i].flags != X265_NS::cpu_names[i - 1].flags))
241
0
                p += snprintf(p, sizeof(buf) - (p - buf), " %s", X265_NS::cpu_names[i].name);
242
0
        }
243
244
0
        if (p == none)
245
0
            snprintf(p, sizeof(buf) - (p - buf), " none!");
246
0
        x265_log(param, X265_LOG_INFO, "%s\n", buf);
247
0
    }
248
0
}
249
250
void x265_setup_primitives(x265_param *param)
251
0
{
252
0
    if (!primitives.pu[0].sad)
253
0
    {
254
0
        setupCPrimitives(primitives);
255
256
        /* We do not want the encoder to use the un-optimized intra all-angles
257
         * C references. It is better to call the individual angle functions
258
         * instead. We must check for NULL before using this primitive */
259
0
        for (int i = 0; i < NUM_TR_SIZE; i++)
260
0
            primitives.cu[i].intra_pred_allangs = NULL;
261
262
#if ENABLE_ASSEMBLY
263
#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
264
        setupIntrinsicPrimitives(primitives, param->cpuid);
265
#endif
266
        setupAssemblyPrimitives(primitives, param->cpuid);
267
#endif
268
#if HAVE_ALTIVEC
269
        if (param->cpuid & X265_CPU_ALTIVEC)
270
        {
271
            setupPixelPrimitives_altivec(primitives);       // pixel_altivec.cpp, overwrite the initialization for altivec optimizated functions
272
            setupDCTPrimitives_altivec(primitives);         // dct_altivec.cpp, overwrite the initialization for altivec optimizated functions
273
            setupFilterPrimitives_altivec(primitives);      // ipfilter.cpp, overwrite the initialization for altivec optimizated functions
274
            setupIntraPrimitives_altivec(primitives);       // intrapred_altivec.cpp, overwrite the initialization for altivec optimizated functions
275
        }
276
#endif
277
278
0
        setupAliasPrimitives(primitives);
279
280
0
        if (param->bLowPassDct)
281
0
        {
282
0
            enableLowpassDCTPrimitives(primitives); 
283
0
        }
284
0
    }
285
286
0
    x265_report_simd(param);
287
0
}
288
}
289
290
#if ENABLE_ASSEMBLY && X265_ARCH_X86
291
/* these functions are implemented in assembly. When assembly is not being
292
 * compiled, they are unnecessary and can be NOPs */
293
#else
294
extern "C" {
295
0
int PFX(cpu_cpuid_test)(void) { return 0; }
296
0
void PFX(cpu_emms)(void) {}
297
0
void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, uint32_t *) { *eax = 0; }
298
0
void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {}
299
300
#if X265_ARCH_ARM == 0
301
0
void PFX(cpu_neon_test)(void) {}
302
0
int PFX(cpu_fast_neon_mrc_test)(void) { return 0; }
303
#endif // X265_ARCH_ARM
304
}
305
#endif