/src/x265/source/common/yuv.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Steve Borho <steve@borho.org> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | |
26 | | #include "common.h" |
27 | | #include "yuv.h" |
28 | | #include "shortyuv.h" |
29 | | #include "picyuv.h" |
30 | | #include "primitives.h" |
31 | | |
32 | | using namespace X265_NS; |
33 | | |
34 | | Yuv::Yuv() |
35 | 0 | { |
36 | 0 | m_buf[0] = NULL; |
37 | 0 | m_buf[1] = NULL; |
38 | 0 | m_buf[2] = NULL; |
39 | 0 | } |
40 | | |
41 | | bool Yuv::create(uint32_t size, int csp) |
42 | 0 | { |
43 | 0 | m_csp = csp; |
44 | 0 | m_hChromaShift = CHROMA_H_SHIFT(csp); |
45 | 0 | m_vChromaShift = CHROMA_V_SHIFT(csp); |
46 | |
|
47 | 0 | m_size = size; |
48 | 0 | m_part = partitionFromSizes(size, size); |
49 | |
|
50 | 0 | for (int i = 0; i < 2; i++) |
51 | 0 | for (int j = 0; j < MAX_NUM_REF; j++) |
52 | 0 | for (int k = 0; k < INTEGRAL_PLANE_NUM; k++) |
53 | 0 | m_integral[i][j][k] = NULL; |
54 | |
|
55 | 0 | if (csp == X265_CSP_I400) |
56 | 0 | { |
57 | 0 | CHECKED_MALLOC(m_buf[0], pixel, size * size + 8); |
58 | 0 | m_buf[1] = m_buf[2] = 0; |
59 | 0 | m_csize = 0; |
60 | 0 | return true; |
61 | 0 | } |
62 | 0 | else |
63 | 0 | { |
64 | 0 | m_csize = size >> m_hChromaShift; |
65 | |
|
66 | 0 | size_t sizeL = size * size; |
67 | 0 | size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift); |
68 | |
|
69 | 0 | X265_CHECK((sizeC & 15) == 0, "invalid size"); |
70 | | |
71 | | // memory allocation (padded for SIMD reads) |
72 | 0 | CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8); |
73 | 0 | m_buf[1] = m_buf[0] + sizeL; |
74 | 0 | m_buf[2] = m_buf[0] + sizeL + sizeC; |
75 | 0 | return true; |
76 | 0 | } |
77 | | |
78 | 0 | fail: |
79 | 0 | return false; |
80 | 0 | } |
81 | | |
82 | | void Yuv::destroy() |
83 | 0 | { |
84 | 0 | X265_FREE(m_buf[0]); |
85 | 0 | } |
86 | | |
87 | | void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const |
88 | 0 | { |
89 | 0 | pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx); |
90 | 0 | primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size); |
91 | 0 | if (m_csp != X265_CSP_I400) |
92 | 0 | { |
93 | 0 | pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx); |
94 | 0 | pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx); |
95 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize); |
96 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize); |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | | void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx) |
101 | 0 | { |
102 | 0 | const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx); |
103 | 0 | primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride); |
104 | 0 | if (m_csp != X265_CSP_I400) |
105 | 0 | { |
106 | 0 | const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx); |
107 | 0 | const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx); |
108 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC); |
109 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC); |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | | void Yuv::copyFromYuv(const Yuv& srcYuv) |
114 | 0 | { |
115 | 0 | X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n"); |
116 | |
|
117 | 0 | primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size); |
118 | 0 | if (m_csp != X265_CSP_I400) |
119 | 0 | { |
120 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize); |
121 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize); |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */ |
126 | | void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma) |
127 | 0 | { |
128 | 0 | X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n"); |
129 | |
|
130 | 0 | const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size); |
131 | 0 | primitives.pu[partEnum].copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size); |
132 | |
|
133 | 0 | if (bChroma) |
134 | 0 | { |
135 | 0 | const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx); |
136 | 0 | const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx); |
137 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize); |
138 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize); |
139 | 0 | } |
140 | 0 | } |
141 | | |
142 | | void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
143 | 0 | { |
144 | 0 | pixel* dstY = dstYuv.getLumaAddr(absPartIdx); |
145 | 0 | primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size); |
146 | 0 | if (m_csp != X265_CSP_I400) |
147 | 0 | { |
148 | 0 | pixel* dstU = dstYuv.getCbAddr(absPartIdx); |
149 | 0 | pixel* dstV = dstYuv.getCrAddr(absPartIdx); |
150 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize); |
151 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize); |
152 | 0 | } |
153 | 0 | } |
154 | | |
155 | | void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
156 | 0 | { |
157 | 0 | pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size); |
158 | 0 | pixel* dstY = dstYuv.m_buf[0]; |
159 | 0 | primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size); |
160 | 0 | if (m_csp != X265_CSP_I400) |
161 | 0 | { |
162 | 0 | pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx); |
163 | 0 | pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx); |
164 | 0 | pixel* dstU = dstYuv.m_buf[1]; |
165 | 0 | pixel* dstV = dstYuv.m_buf[2]; |
166 | 0 | primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize); |
167 | 0 | primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize); |
168 | 0 | } |
169 | 0 | } |
170 | | |
171 | | void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp) |
172 | 0 | { |
173 | 0 | primitives.cu[log2SizeL - 2].add_ps[(m_size % 64 == 0) && (srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0)](m_buf[0], |
174 | 0 | m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size); |
175 | 0 | if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400) |
176 | 0 | { |
177 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 ==0) && (srcYuv1.m_csize % 64 == 0)](m_buf[1], |
178 | 0 | m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize); |
179 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0)](m_buf[2], |
180 | 0 | m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize); |
181 | 0 | } |
182 | 0 | if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) |
183 | 0 | { |
184 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize); |
185 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize); |
186 | 0 | } |
187 | 0 | } |
188 | | |
189 | | void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) |
190 | 0 | { |
191 | 0 | int part = partitionFromSizes(width, height); |
192 | |
|
193 | 0 | if (bLuma) |
194 | 0 | { |
195 | 0 | const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx); |
196 | 0 | const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx); |
197 | 0 | pixel* dstY = getLumaAddr(absPartIdx); |
198 | 0 | primitives.pu[part].addAvg[(srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0)](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size); |
199 | 0 | } |
200 | 0 | if (bChroma) |
201 | 0 | { |
202 | 0 | const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx); |
203 | 0 | const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx); |
204 | 0 | const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx); |
205 | 0 | const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx); |
206 | 0 | pixel* dstU = getCbAddr(absPartIdx); |
207 | 0 | pixel* dstV = getCrAddr(absPartIdx); |
208 | 0 | primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
209 | 0 | primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | | void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const |
214 | 0 | { |
215 | 0 | const pixel* src = getLumaAddr(absPartIdx); |
216 | 0 | pixel* dst = dstYuv.getLumaAddr(absPartIdx); |
217 | 0 | primitives.cu[log2Size - 2].copy_pp(dst, dstYuv.m_size, src, m_size); |
218 | 0 | } |
219 | | |
220 | | void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const |
221 | 0 | { |
222 | 0 | const pixel* srcU = getCbAddr(absPartIdx); |
223 | 0 | const pixel* srcV = getCrAddr(absPartIdx); |
224 | 0 | pixel* dstU = dstYuv.getCbAddr(absPartIdx); |
225 | 0 | pixel* dstV = dstYuv.getCrAddr(absPartIdx); |
226 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize); |
227 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize); |
228 | 0 | } |