/src/x265/source/common/yuv.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Steve Borho <steve@borho.org> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | |
26 | | #include "common.h" |
27 | | #include "yuv.h" |
28 | | #include "shortyuv.h" |
29 | | #include "picyuv.h" |
30 | | #include "primitives.h" |
31 | | |
32 | | using namespace X265_NS; |
33 | | |
34 | | Yuv::Yuv() |
35 | 3.12M | { |
36 | 3.12M | m_buf[0] = NULL; |
37 | 3.12M | m_buf[1] = NULL; |
38 | 3.12M | m_buf[2] = NULL; |
39 | 3.12M | } |
40 | | |
41 | | bool Yuv::create(uint32_t size, int csp) |
42 | 2.67M | { |
43 | 2.67M | m_csp = csp; |
44 | 2.67M | m_hChromaShift = CHROMA_H_SHIFT(csp); |
45 | 2.67M | m_vChromaShift = CHROMA_V_SHIFT(csp); |
46 | | |
47 | 2.67M | m_size = size; |
48 | 2.67M | m_part = partitionFromSizes(size, size); |
49 | | |
50 | 8.01M | for (int i = 0; i < 2; i++) |
51 | 90.8M | for (int j = 0; j < MAX_NUM_REF; j++) |
52 | 1.11G | for (int k = 0; k < INTEGRAL_PLANE_NUM; k++) |
53 | 1.02G | m_integral[i][j][k] = NULL; |
54 | | |
55 | 2.67M | if (csp == X265_CSP_I400) |
56 | 23.0k | { |
57 | 23.0k | CHECKED_MALLOC(m_buf[0], pixel, size * size + 8); |
58 | 23.0k | m_buf[1] = m_buf[2] = 0; |
59 | 23.0k | m_csize = 0; |
60 | 23.0k | return true; |
61 | 23.0k | } |
62 | 2.64M | else |
63 | 2.64M | { |
64 | 2.64M | m_csize = size >> m_hChromaShift; |
65 | | |
66 | 2.64M | size_t sizeL = size * size; |
67 | 2.64M | size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift); |
68 | | |
69 | 2.64M | X265_CHECK((sizeC & 15) == 0, "invalid size"); |
70 | | |
71 | | // memory allocation (padded for SIMD reads) |
72 | 2.64M | CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8); |
73 | 2.64M | m_buf[1] = m_buf[0] + sizeL; |
74 | 2.64M | m_buf[2] = m_buf[0] + sizeL + sizeC; |
75 | 2.64M | return true; |
76 | 2.64M | } |
77 | | |
78 | 0 | fail: |
79 | 0 | return false; |
80 | 2.67M | } |
81 | | |
82 | | void Yuv::destroy() |
83 | 2.67M | { |
84 | 2.67M | X265_FREE(m_buf[0]); |
85 | 2.67M | } |
86 | | |
87 | | void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const |
88 | 436k | { |
89 | 436k | pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx); |
90 | 436k | primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size); |
91 | 436k | if (m_csp != X265_CSP_I400) |
92 | 436k | { |
93 | 436k | pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx); |
94 | 436k | pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx); |
95 | 436k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize); |
96 | 436k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize); |
97 | 436k | } |
98 | 436k | } |
99 | | |
100 | | void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx) |
101 | 13.9k | { |
102 | 13.9k | const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx); |
103 | 13.9k | primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride); |
104 | 13.9k | if (m_csp != X265_CSP_I400) |
105 | 13.9k | { |
106 | 13.9k | const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx); |
107 | 13.9k | const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx); |
108 | 13.9k | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC); |
109 | 13.9k | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC); |
110 | 13.9k | } |
111 | 13.9k | } |
112 | | |
113 | | void Yuv::copyFromYuv(const Yuv& srcYuv) |
114 | 0 | { |
115 | 0 | X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n"); |
116 | |
|
117 | 0 | primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size); |
118 | 0 | if (m_csp != X265_CSP_I400) |
119 | 0 | { |
120 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize); |
121 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize); |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */ |
126 | | void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma) |
127 | 0 | { |
128 | 0 | X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n"); |
129 | |
|
130 | 0 | const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size); |
131 | 0 | primitives.pu[partEnum].copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size); |
132 | |
|
133 | 0 | if (bChroma) |
134 | 0 | { |
135 | 0 | const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx); |
136 | 0 | const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx); |
137 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize); |
138 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize); |
139 | 0 | } |
140 | 0 | } |
141 | | |
142 | | void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
143 | 440k | { |
144 | 440k | pixel* dstY = dstYuv.getLumaAddr(absPartIdx); |
145 | 440k | primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size); |
146 | 440k | if (m_csp != X265_CSP_I400) |
147 | 440k | { |
148 | 440k | pixel* dstU = dstYuv.getCbAddr(absPartIdx); |
149 | 440k | pixel* dstV = dstYuv.getCrAddr(absPartIdx); |
150 | 440k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize); |
151 | 440k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize); |
152 | 440k | } |
153 | 440k | } |
154 | | |
155 | | void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
156 | 440k | { |
157 | 440k | pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size); |
158 | 440k | pixel* dstY = dstYuv.m_buf[0]; |
159 | 440k | primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size); |
160 | 440k | if (m_csp != X265_CSP_I400) |
161 | 440k | { |
162 | 440k | pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx); |
163 | 440k | pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx); |
164 | 440k | pixel* dstU = dstYuv.m_buf[1]; |
165 | 440k | pixel* dstV = dstYuv.m_buf[2]; |
166 | 440k | primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize); |
167 | 440k | primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize); |
168 | 440k | } |
169 | 440k | } |
170 | | |
171 | | void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp) |
172 | 0 | { |
173 | 0 | primitives.cu[log2SizeL - 2].add_ps[(m_size % 64 == 0) && (srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0)](m_buf[0], |
174 | 0 | m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size); |
175 | 0 | if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400) |
176 | 0 | { |
177 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 ==0) && (srcYuv1.m_csize % 64 == 0)](m_buf[1], |
178 | 0 | m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize); |
179 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0)](m_buf[2], |
180 | 0 | m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize); |
181 | 0 | } |
182 | 0 | if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) |
183 | 0 | { |
184 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize); |
185 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize); |
186 | 0 | } |
187 | 0 | } |
188 | | |
189 | | void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) |
190 | 0 | { |
191 | 0 | int part = partitionFromSizes(width, height); |
192 | |
|
193 | 0 | if (bLuma) |
194 | 0 | { |
195 | 0 | const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx); |
196 | 0 | const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx); |
197 | 0 | pixel* dstY = getLumaAddr(absPartIdx); |
198 | 0 | primitives.pu[part].addAvg[(srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0)](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size); |
199 | 0 | } |
200 | 0 | if (bChroma) |
201 | 0 | { |
202 | 0 | const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx); |
203 | 0 | const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx); |
204 | 0 | const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx); |
205 | 0 | const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx); |
206 | 0 | pixel* dstU = getCbAddr(absPartIdx); |
207 | 0 | pixel* dstV = getCrAddr(absPartIdx); |
208 | 0 | primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
209 | 0 | primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | | void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const |
214 | 1.79M | { |
215 | 1.79M | const pixel* src = getLumaAddr(absPartIdx); |
216 | 1.79M | pixel* dst = dstYuv.getLumaAddr(absPartIdx); |
217 | 1.79M | primitives.cu[log2Size - 2].copy_pp(dst, dstYuv.m_size, src, m_size); |
218 | 1.79M | } |
219 | | |
220 | | void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const |
221 | 1.18M | { |
222 | 1.18M | const pixel* srcU = getCbAddr(absPartIdx); |
223 | 1.18M | const pixel* srcV = getCrAddr(absPartIdx); |
224 | 1.18M | pixel* dstU = dstYuv.getCbAddr(absPartIdx); |
225 | 1.18M | pixel* dstV = dstYuv.getCrAddr(absPartIdx); |
226 | 1.18M | primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize); |
227 | 1.18M | primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize); |
228 | 1.18M | } |