Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/yuv.cpp
Line
Count
Source
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *          Min Chen <chenm003@163.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
 *
21
 * This program is also available under a commercial proprietary license.
22
 * For more information, contact us at license @ x265.com.
23
 *****************************************************************************/
24
25
26
#include "common.h"
27
#include "yuv.h"
28
#include "shortyuv.h"
29
#include "picyuv.h"
30
#include "primitives.h"
31
2.44M
#define BUFFER_PADDING 8
32
33
using namespace X265_NS;
34
35
Yuv::Yuv()
36
2.93M
{
37
2.93M
    m_buf[0] = NULL;
38
2.93M
    m_buf[1] = NULL;
39
2.93M
    m_buf[2] = NULL;
40
2.93M
}
41
42
bool Yuv::create(uint32_t size, int csp)
43
2.46M
{
44
2.46M
    m_csp = csp;
45
2.46M
    m_hChromaShift = CHROMA_H_SHIFT(csp);
46
2.46M
    m_vChromaShift = CHROMA_V_SHIFT(csp);
47
48
2.46M
    m_size  = size;
49
2.46M
    m_part = partitionFromSizes(size, size);
50
51
7.39M
    for (int i = 0; i < 2; i++)
52
83.8M
        for (int j = 0; j < MAX_NUM_REF; j++)
53
1.02G
            for (int k = 0; k < INTEGRAL_PLANE_NUM; k++)
54
946M
                m_integral[i][j][k] = NULL;
55
56
2.46M
    if (csp == X265_CSP_I400)
57
21.5k
    {
58
21.5k
        CHECKED_MALLOC(m_buf[0], pixel, size * size + BUFFER_PADDING);
59
21.5k
        m_buf[1] = m_buf[2] = 0;
60
21.5k
        m_csize = 0;
61
21.5k
        return true;
62
21.5k
    }
63
2.44M
    else
64
2.44M
    {
65
2.44M
        m_csize = size >> m_hChromaShift;
66
67
2.44M
        size_t sizeL = size * size;
68
2.44M
        size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
69
70
2.44M
        X265_CHECK((sizeC & 15) == 0, "invalid size");
71
2.44M
        size_t totalSize = sizeL + sizeC * 2 + 8 + BUFFER_PADDING;
72
73
        // memory allocation (padded for SIMD reads)
74
2.44M
        CHECKED_MALLOC(m_buf[0], pixel, totalSize);
75
2.44M
        m_buf[1] = m_buf[0] + sizeL;
76
2.44M
        m_buf[2] = m_buf[0] + sizeL + sizeC;
77
2.44M
        X265_CHECK(m_buf[2] + sizeC <= m_buf[0] + totalSize, "Buffer overflow detected");
78
2.44M
        return true;
79
2.44M
    }
80
81
0
fail:
82
0
    return false;
83
2.46M
}
84
85
void Yuv::destroy()
86
2.46M
{
87
2.46M
    X265_FREE(m_buf[0]);
88
2.46M
}
89
90
void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
91
384k
{
92
384k
    pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
93
384k
    primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
94
384k
    if (m_csp != X265_CSP_I400)
95
384k
    {
96
384k
        pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
97
384k
        pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
98
384k
        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
99
384k
        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
100
384k
    }
101
384k
}
102
103
void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
104
13.7k
{
105
13.7k
    const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
106
13.7k
    primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
107
13.7k
    if (m_csp != X265_CSP_I400)
108
13.7k
    {
109
13.7k
        const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
110
13.7k
        const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
111
13.7k
        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
112
13.7k
        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
113
13.7k
    }
114
13.7k
}
115
116
void Yuv::copyFromYuv(const Yuv& srcYuv)
117
0
{
118
0
    X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
119
120
0
    primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
121
0
    if (m_csp != X265_CSP_I400)
122
0
    {
123
0
        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
124
0
        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
125
0
    }
126
0
}
127
128
/* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
129
void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
130
0
{
131
0
    X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
132
133
0
    const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
134
0
    primitives.pu[partEnum].copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size);
135
136
0
    if (bChroma)
137
0
    {
138
0
        const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
139
0
        const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
140
0
        primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize);
141
0
        primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize);
142
0
    }
143
0
}
144
145
void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
146
386k
{
147
386k
    pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
148
386k
    primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
149
386k
    if (m_csp != X265_CSP_I400)
150
386k
    {
151
386k
        pixel* dstU = dstYuv.getCbAddr(absPartIdx);
152
386k
        pixel* dstV = dstYuv.getCrAddr(absPartIdx);
153
386k
        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
154
386k
        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
155
386k
    }
156
386k
}
157
158
void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
159
386k
{
160
386k
    pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
161
386k
    pixel* dstY = dstYuv.m_buf[0];
162
386k
    primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size);
163
386k
    if (m_csp != X265_CSP_I400)
164
386k
    {
165
386k
        pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
166
386k
        pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
167
386k
        pixel* dstU = dstYuv.m_buf[1];
168
386k
        pixel* dstV = dstYuv.m_buf[2];
169
386k
        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
170
386k
        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
171
386k
    }
172
386k
}
173
174
void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp)
175
0
{
176
0
    primitives.cu[log2SizeL - 2].add_ps[(m_size % 64 == 0) && (srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0)](m_buf[0],
177
0
                                         m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
178
0
    if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400)
179
0
    {
180
0
        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 ==0) && (srcYuv1.m_csize % 64 == 0)](m_buf[1],
181
0
                                                           m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
182
0
        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0)](m_buf[2],
183
0
                                                           m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
184
0
    }
185
0
    if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
186
0
    {
187
0
        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize);
188
0
        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize);
189
0
    }
190
0
}
191
192
void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
193
0
{
194
0
    int part = partitionFromSizes(width, height);
195
196
0
    if (bLuma)
197
0
    {
198
0
        const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
199
0
        const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
200
0
        pixel* dstY = getLumaAddr(absPartIdx);
201
0
        primitives.pu[part].addAvg[(srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0)](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
202
0
    }
203
0
    if (bChroma)
204
0
    {
205
0
        const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
206
0
        const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
207
0
        const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
208
0
        const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
209
0
        pixel* dstU = getCbAddr(absPartIdx);
210
0
        pixel* dstV = getCrAddr(absPartIdx);
211
0
        primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
212
0
        primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
213
0
    }
214
0
}
215
216
void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
217
1.57M
{
218
1.57M
    const pixel* src = getLumaAddr(absPartIdx);
219
1.57M
    pixel* dst = dstYuv.getLumaAddr(absPartIdx);
220
1.57M
    primitives.cu[log2Size - 2].copy_pp(dst, dstYuv.m_size, src, m_size);
221
1.57M
}
222
223
void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
224
1.05M
{
225
1.05M
    const pixel* srcU = getCbAddr(absPartIdx);
226
1.05M
    const pixel* srcV = getCrAddr(absPartIdx);
227
1.05M
    pixel* dstU = dstYuv.getCbAddr(absPartIdx);
228
1.05M
    pixel* dstV = dstYuv.getCrAddr(absPartIdx);
229
1.05M
    primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
230
1.05M
    primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
231
1.05M
}