/work/x265/source/common/yuv.cpp
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Steve Borho <steve@borho.org> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | |
26 | | #include "common.h" |
27 | | #include "yuv.h" |
28 | | #include "shortyuv.h" |
29 | | #include "picyuv.h" |
30 | | #include "primitives.h" |
31 | 2.44M | #define BUFFER_PADDING 8 |
32 | | |
33 | | using namespace X265_NS; |
34 | | |
35 | | Yuv::Yuv() |
36 | 2.93M | { |
37 | 2.93M | m_buf[0] = NULL; |
38 | 2.93M | m_buf[1] = NULL; |
39 | 2.93M | m_buf[2] = NULL; |
40 | 2.93M | } |
41 | | |
42 | | bool Yuv::create(uint32_t size, int csp) |
43 | 2.46M | { |
44 | 2.46M | m_csp = csp; |
45 | 2.46M | m_hChromaShift = CHROMA_H_SHIFT(csp); |
46 | 2.46M | m_vChromaShift = CHROMA_V_SHIFT(csp); |
47 | | |
48 | 2.46M | m_size = size; |
49 | 2.46M | m_part = partitionFromSizes(size, size); |
50 | | |
51 | 7.39M | for (int i = 0; i < 2; i++) |
52 | 83.8M | for (int j = 0; j < MAX_NUM_REF; j++) |
53 | 1.02G | for (int k = 0; k < INTEGRAL_PLANE_NUM; k++) |
54 | 946M | m_integral[i][j][k] = NULL; |
55 | | |
56 | 2.46M | if (csp == X265_CSP_I400) |
57 | 21.5k | { |
58 | 21.5k | CHECKED_MALLOC(m_buf[0], pixel, size * size + BUFFER_PADDING); |
59 | 21.5k | m_buf[1] = m_buf[2] = 0; |
60 | 21.5k | m_csize = 0; |
61 | 21.5k | return true; |
62 | 21.5k | } |
63 | 2.44M | else |
64 | 2.44M | { |
65 | 2.44M | m_csize = size >> m_hChromaShift; |
66 | | |
67 | 2.44M | size_t sizeL = size * size; |
68 | 2.44M | size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift); |
69 | | |
70 | 2.44M | X265_CHECK((sizeC & 15) == 0, "invalid size"); |
71 | 2.44M | size_t totalSize = sizeL + sizeC * 2 + 8 + BUFFER_PADDING; |
72 | | |
73 | | // memory allocation (padded for SIMD reads) |
74 | 2.44M | CHECKED_MALLOC(m_buf[0], pixel, totalSize); |
75 | 2.44M | m_buf[1] = m_buf[0] + sizeL; |
76 | 2.44M | m_buf[2] = m_buf[0] + sizeL + sizeC; |
77 | 2.44M | X265_CHECK(m_buf[2] + sizeC <= m_buf[0] + totalSize, "Buffer overflow detected"); |
78 | 2.44M | return true; |
79 | 2.44M | } |
80 | | |
81 | 0 | fail: |
82 | 0 | return false; |
83 | 2.46M | } |
84 | | |
85 | | void Yuv::destroy() |
86 | 2.46M | { |
87 | 2.46M | X265_FREE(m_buf[0]); |
88 | 2.46M | } |
89 | | |
90 | | void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const |
91 | 384k | { |
92 | 384k | pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx); |
93 | 384k | primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size); |
94 | 384k | if (m_csp != X265_CSP_I400) |
95 | 384k | { |
96 | 384k | pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx); |
97 | 384k | pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx); |
98 | 384k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize); |
99 | 384k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize); |
100 | 384k | } |
101 | 384k | } |
102 | | |
103 | | void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx) |
104 | 13.7k | { |
105 | 13.7k | const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx); |
106 | 13.7k | primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride); |
107 | 13.7k | if (m_csp != X265_CSP_I400) |
108 | 13.7k | { |
109 | 13.7k | const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx); |
110 | 13.7k | const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx); |
111 | 13.7k | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC); |
112 | 13.7k | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC); |
113 | 13.7k | } |
114 | 13.7k | } |
115 | | |
116 | | void Yuv::copyFromYuv(const Yuv& srcYuv) |
117 | 0 | { |
118 | 0 | X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n"); |
119 | |
|
120 | 0 | primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size); |
121 | 0 | if (m_csp != X265_CSP_I400) |
122 | 0 | { |
123 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize); |
124 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize); |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | | /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */ |
129 | | void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma) |
130 | 0 | { |
131 | 0 | X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n"); |
132 | |
|
133 | 0 | const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size); |
134 | 0 | primitives.pu[partEnum].copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size); |
135 | |
|
136 | 0 | if (bChroma) |
137 | 0 | { |
138 | 0 | const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx); |
139 | 0 | const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx); |
140 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize); |
141 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize); |
142 | 0 | } |
143 | 0 | } |
144 | | |
145 | | void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
146 | 386k | { |
147 | 386k | pixel* dstY = dstYuv.getLumaAddr(absPartIdx); |
148 | 386k | primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size); |
149 | 386k | if (m_csp != X265_CSP_I400) |
150 | 386k | { |
151 | 386k | pixel* dstU = dstYuv.getCbAddr(absPartIdx); |
152 | 386k | pixel* dstV = dstYuv.getCrAddr(absPartIdx); |
153 | 386k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize); |
154 | 386k | primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize); |
155 | 386k | } |
156 | 386k | } |
157 | | |
158 | | void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
159 | 386k | { |
160 | 386k | pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size); |
161 | 386k | pixel* dstY = dstYuv.m_buf[0]; |
162 | 386k | primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size); |
163 | 386k | if (m_csp != X265_CSP_I400) |
164 | 386k | { |
165 | 386k | pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx); |
166 | 386k | pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx); |
167 | 386k | pixel* dstU = dstYuv.m_buf[1]; |
168 | 386k | pixel* dstV = dstYuv.m_buf[2]; |
169 | 386k | primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize); |
170 | 386k | primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize); |
171 | 386k | } |
172 | 386k | } |
173 | | |
174 | | void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp) |
175 | 0 | { |
176 | 0 | primitives.cu[log2SizeL - 2].add_ps[(m_size % 64 == 0) && (srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0)](m_buf[0], |
177 | 0 | m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size); |
178 | 0 | if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400) |
179 | 0 | { |
180 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 ==0) && (srcYuv1.m_csize % 64 == 0)](m_buf[1], |
181 | 0 | m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize); |
182 | 0 | primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0)](m_buf[2], |
183 | 0 | m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize); |
184 | 0 | } |
185 | 0 | if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) |
186 | 0 | { |
187 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize); |
188 | 0 | primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize); |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) |
193 | 0 | { |
194 | 0 | int part = partitionFromSizes(width, height); |
195 | |
|
196 | 0 | if (bLuma) |
197 | 0 | { |
198 | 0 | const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx); |
199 | 0 | const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx); |
200 | 0 | pixel* dstY = getLumaAddr(absPartIdx); |
201 | 0 | primitives.pu[part].addAvg[(srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0)](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size); |
202 | 0 | } |
203 | 0 | if (bChroma) |
204 | 0 | { |
205 | 0 | const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx); |
206 | 0 | const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx); |
207 | 0 | const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx); |
208 | 0 | const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx); |
209 | 0 | pixel* dstU = getCbAddr(absPartIdx); |
210 | 0 | pixel* dstV = getCrAddr(absPartIdx); |
211 | 0 | primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
212 | 0 | primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const |
217 | 1.57M | { |
218 | 1.57M | const pixel* src = getLumaAddr(absPartIdx); |
219 | 1.57M | pixel* dst = dstYuv.getLumaAddr(absPartIdx); |
220 | 1.57M | primitives.cu[log2Size - 2].copy_pp(dst, dstYuv.m_size, src, m_size); |
221 | 1.57M | } |
222 | | |
223 | | void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const |
224 | 1.05M | { |
225 | 1.05M | const pixel* srcU = getCbAddr(absPartIdx); |
226 | 1.05M | const pixel* srcV = getCrAddr(absPartIdx); |
227 | 1.05M | pixel* dstU = dstYuv.getCbAddr(absPartIdx); |
228 | 1.05M | pixel* dstV = dstYuv.getCrAddr(absPartIdx); |
229 | 1.05M | primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize); |
230 | 1.05M | primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize); |
231 | 1.05M | } |