/src/librawspeed/src/librawspeed/interpolators/Cr2sRawInterpolator.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | RawSpeed - RAW file decoder. |
3 | | |
4 | | Copyright (C) 2009-2014 Klaus Post |
5 | | Copyright (C) 2015-2017 Roman Lebedev |
6 | | |
7 | | This library is free software; you can redistribute it and/or |
8 | | modify it under the terms of the GNU Lesser General Public |
9 | | License as published by the Free Software Foundation; either |
10 | | version 2 of the License, or (at your option) any later version. |
11 | | |
12 | | This library is distributed in the hope that it will be useful, |
13 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | Lesser General Public License for more details. |
16 | | |
17 | | You should have received a copy of the GNU Lesser General Public |
18 | | License along with this library; if not, write to the Free Software |
19 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include "rawspeedconfig.h" |
23 | | #include "interpolators/Cr2sRawInterpolator.h" |
24 | | #include "adt/Array2DRef.h" |
25 | | #include "adt/Bit.h" |
26 | | #include "adt/CroppedArray1DRef.h" |
27 | | #include "adt/Invariant.h" |
28 | | #include "adt/Point.h" |
29 | | #include "common/Common.h" |
30 | | #include "common/RawImage.h" |
31 | | #include "decoders/RawDecoderException.h" |
32 | | #include <array> |
33 | | #include <cstdint> |
34 | | |
35 | | namespace rawspeed { |
36 | | |
37 | | struct Cr2sRawInterpolator::YCbCr final { |
38 | | int Y = 0; |
39 | | int Cb = 0; |
40 | | int Cr = 0; |
41 | | |
42 | 0 | static void LoadY(YCbCr* p, const CroppedArray1DRef<const uint16_t> in) { |
43 | 0 | invariant(p); |
44 | 0 | invariant(in.size() == 1); |
45 | | |
46 | 0 | p->Y = in(0); |
47 | 0 | } |
48 | | |
49 | 0 | static void LoadCbCr(YCbCr* p, const CroppedArray1DRef<const uint16_t> in) { |
50 | 0 | invariant(p); |
51 | 0 | invariant(in.size() == 2); |
52 | | |
53 | 0 | p->Cb = in(0); |
54 | 0 | p->Cr = in(1); |
55 | 0 | } |
56 | | |
57 | 0 | static void CopyCbCr(YCbCr* p, const YCbCr& pSrc) { |
58 | 0 | invariant(p); |
59 | | |
60 | 0 | p->Cb = pSrc.Cb; |
61 | 0 | p->Cr = pSrc.Cr; |
62 | 0 | } |
63 | | |
64 | 0 | YCbCr() = default; |
65 | | |
66 | 0 | void signExtend() { |
67 | 0 | Cb -= 16384; |
68 | 0 | Cr -= 16384; |
69 | 0 | } |
70 | | |
71 | 0 | void applyHue(int hue_) { |
72 | 0 | Cb += hue_; |
73 | 0 | Cr += hue_; |
74 | 0 | } |
75 | | |
76 | 0 | void process(int hue_) { |
77 | 0 | signExtend(); |
78 | 0 | applyHue(hue_); |
79 | 0 | } |
80 | | |
81 | 0 | void interpolateCbCr(const YCbCr& p0, const YCbCr& p2) { |
82 | | // Y is already good, need to interpolate Cb and Cr |
83 | | // FIXME: dcraw does +1 before >> 1 |
84 | 0 | Cb = (p0.Cb + p2.Cb) >> 1; |
85 | 0 | Cr = (p0.Cr + p2.Cr) >> 1; |
86 | 0 | } |
87 | | |
88 | | void interpolateCbCr(const YCbCr& p0, const YCbCr& p1, const YCbCr& p2, |
89 | 0 | const YCbCr& p3) { |
90 | | // Y is already good, need to interpolate Cb and Cr |
91 | | // FIXME: dcraw does +1 before >> 1 |
92 | 0 | Cb = (p0.Cb + p1.Cb + p2.Cb + p3.Cb) >> 2; |
93 | 0 | Cr = (p0.Cr + p1.Cr + p2.Cr + p3.Cr) >> 2; |
94 | 0 | } |
95 | | }; |
96 | | |
97 | 0 | template <int version> void Cr2sRawInterpolator::interpolate_422_row(int row) { |
98 | 0 | const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef()); |
99 | |
|
100 | 0 | constexpr int InputComponentsPerMCU = 4; |
101 | 0 | constexpr int PixelsPerMCU = 2; |
102 | 0 | constexpr int YsPerMCU = PixelsPerMCU; |
103 | 0 | constexpr int ComponentsPerPixel = 3; |
104 | 0 | constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU; |
105 | |
|
106 | 0 | invariant(input.width() % InputComponentsPerMCU == 0); |
107 | 0 | int numMCUs = input.width() / InputComponentsPerMCU; |
108 | 0 | invariant(numMCUs > 1); |
109 | | |
110 | 0 | using MCUTy = std::array<YCbCr, PixelsPerMCU>; |
111 | |
|
112 | 0 | auto LoadMCU = [input_ = input, row](int MCUIdx) { |
113 | 0 | MCUTy MCU; |
114 | 0 | for (int YIdx = 0; YIdx < PixelsPerMCU; ++YIdx) |
115 | 0 | YCbCr::LoadY(&MCU[YIdx], input_[row].getCrop( |
116 | 0 | InputComponentsPerMCU * MCUIdx + YIdx, 1)); |
117 | 0 | YCbCr::LoadCbCr(&MCU[0], input_[row].getCrop( |
118 | 0 | InputComponentsPerMCU * MCUIdx + YsPerMCU, 2)); |
119 | 0 | return MCU; |
120 | 0 | }; Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<0>(int)::{lambda(int)#1}::operator()(int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<1>(int)::{lambda(int)#1}::operator()(int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<2>(int)::{lambda(int)#1}::operator()(int) const |
121 | 0 | auto StoreMCU = [this, out, row](const MCUTy& MCU, int MCUIdx) { |
122 | 0 | for (int Pixel = 0; Pixel < PixelsPerMCU; ++Pixel) { |
123 | 0 | YUV_TO_RGB<version>(MCU[Pixel], |
124 | 0 | out[row].getCrop(OutputComponentsPerMCU * MCUIdx + |
125 | 0 | ComponentsPerPixel * Pixel, |
126 | 0 | 3)); |
127 | 0 | } |
128 | 0 | }; Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<0>(int)::{lambda(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int)#1}::operator()(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<1>(int)::{lambda(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int)#1}::operator()(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<2>(int)::{lambda(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int)#1}::operator()(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int) const |
129 | | |
130 | | // The packed input format is: |
131 | | // p0 p1 p0 p0 p2 p3 p2 p2 |
132 | | // [ Y1 Y2 Cb Cr ] [ Y1 Y2 Cb Cr ] ... |
133 | | // in unpacked form that is: |
134 | | // p0 p1 p2 p3 |
135 | | // [ Y1 Cb Cr ] [ Y2 ... ... ] [ Y1 Cb Cr ] [ Y2 ... ... ] ... |
136 | | // i.e. even pixels are full, odd pixels need interpolation: |
137 | | // p0 p1 p2 p3 |
138 | | // [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
139 | | // for last (odd) pixel of the line, just keep Cb/Cr from previous pixel |
140 | | // see http://lclevy.free.fr/cr2/#sraw |
141 | |
|
142 | 0 | int MCUIdx; |
143 | | // Process all MCU's except the last one. |
144 | 0 | for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) { |
145 | 0 | invariant(MCUIdx + 1 <= numMCUs); |
146 | | |
147 | | // For 4:2:2, one MCU encodes 2 pixels, and odd pixels need interpolation, |
148 | | // so we need to load three pixels, and thus we must load 2 MCU's. |
149 | 0 | std::array<MCUTy, 2> MCUs; |
150 | 0 | for (int SubMCUIdx = 0; static_cast<unsigned>(SubMCUIdx) < MCUs.size(); |
151 | 0 | ++SubMCUIdx) |
152 | 0 | MCUs[SubMCUIdx] = LoadMCU(MCUIdx + SubMCUIdx); |
153 | | |
154 | | // Process first pixel, which is full |
155 | 0 | MCUs[0][0].process(hue); |
156 | | // Process third pixel, which is, again, full |
157 | 0 | MCUs[1][0].process(hue); |
158 | | // Interpolate the middle pixel, for which only the Y was known. |
159 | 0 | MCUs[0][1].interpolateCbCr(MCUs[0][0], MCUs[1][0]); |
160 | | |
161 | | // And finally, store the first MCU, i.e. first two pixels. |
162 | 0 | StoreMCU(MCUs[0], MCUIdx); |
163 | 0 | } |
164 | | |
165 | 0 | invariant(MCUIdx + 1 == numMCUs); |
166 | | |
167 | | // Last two pixels, the packed input format is: |
168 | | // p0 p1 p0 p0 |
169 | | // .. [ Y1 Y2 Cb Cr ] |
170 | | // in unpacked form that is: |
171 | | // p0 p1 |
172 | | // .. [ Y1 Cb Cr ] [ Y2 ... ... ] |
173 | | |
174 | 0 | MCUTy MCU = LoadMCU(MCUIdx); |
175 | |
|
176 | 0 | MCU[0].process(hue); |
177 | 0 | YCbCr::CopyCbCr(&MCU[1], MCU[0]); |
178 | |
|
179 | 0 | StoreMCU(MCU, MCUIdx); |
180 | 0 | } Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422_row<0>(int) Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422_row<1>(int) Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422_row<2>(int) |
181 | | |
182 | 0 | template <int version> void Cr2sRawInterpolator::interpolate_422() { |
183 | 0 | const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef()); |
184 | 0 | invariant(out.width() > 0); |
185 | 0 | invariant(out.height() > 0); |
186 | | |
187 | | // Benchmarking suggests that for real-world usage, it is not beneficial to |
188 | | // parallelize this, and in fact leads to worse performance. |
189 | 0 | for (int row = 0; row < out.height(); row++) |
190 | 0 | interpolate_422_row<version>(row); |
191 | 0 | } Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422<0>() Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422<1>() Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422<2>() |
192 | | |
193 | 0 | template <int version> void Cr2sRawInterpolator::interpolate_420_row(int row) { |
194 | 0 | const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef()); |
195 | |
|
196 | 0 | constexpr int X_S_F = 2; |
197 | 0 | constexpr int Y_S_F = 2; |
198 | 0 | constexpr int PixelsPerMCU = X_S_F * Y_S_F; |
199 | 0 | constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU; |
200 | |
|
201 | 0 | constexpr int YsPerMCU = PixelsPerMCU; |
202 | 0 | constexpr int ComponentsPerPixel = 3; |
203 | 0 | constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU; |
204 | |
|
205 | 0 | invariant(input.width() % InputComponentsPerMCU == 0); |
206 | 0 | int numMCUs = input.width() / InputComponentsPerMCU; |
207 | 0 | invariant(numMCUs > 1); |
208 | | |
209 | 0 | using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>; |
210 | |
|
211 | 0 | auto LoadMCU = [input_ = input](int Row, int MCUIdx) |
212 | 0 | __attribute__((always_inline)) { |
213 | 0 | MCUTy MCU; |
214 | 0 | for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) { |
215 | 0 | for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) { |
216 | 0 | YCbCr::LoadY(&MCU[MCURow][MCUCol], |
217 | 0 | input_[Row].getCrop(InputComponentsPerMCU * MCUIdx + |
218 | 0 | X_S_F * MCURow + MCUCol, |
219 | 0 | 1)); |
220 | 0 | } |
221 | 0 | } |
222 | 0 | YCbCr::LoadCbCr( |
223 | 0 | &MCU[0][0], |
224 | 0 | input_[Row].getCrop(InputComponentsPerMCU * MCUIdx + YsPerMCU, 2)); |
225 | 0 | return MCU; |
226 | 0 | }; Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<1>(int)::{lambda(int, int)#1}::operator()(int, int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<2>(int)::{lambda(int, int)#1}::operator()(int, int) const |
227 | 0 | auto StoreMCU = [ this, out ](const MCUTy& MCU, int MCUIdx, int Row) |
228 | 0 | __attribute__((always_inline)) { |
229 | 0 | for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) { |
230 | 0 | for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) { |
231 | 0 | YUV_TO_RGB<version>(MCU[MCURow][MCUCol], |
232 | 0 | out[2 * Row + MCURow].getCrop( |
233 | 0 | ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) + |
234 | 0 | ComponentsPerPixel * MCUCol, |
235 | 0 | 3)); |
236 | 0 | } |
237 | 0 | } |
238 | 0 | }; Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<1>(int)::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<2>(int)::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const |
239 | |
|
240 | 0 | invariant(row + 1 <= input.height()); |
241 | | |
242 | | // The packed input format is: |
243 | | // p0 p1 p2 p3 p0 p0 p4 p5 p6 p7 p4 p4 |
244 | | // row 0: [ Y1 Y2 Y3 Y4 Cb Cr ] [ Y1 Y2 Y3 Y4 Cb Cr ] ... |
245 | | // row 1: [ Y1 Y2 Y3 Y4 Cb Cr ] [ Y1 Y2 Y3 Y4 Cb Cr ] ... |
246 | | // .. .. .. .. . . .. .. .. .. . . |
247 | | // in unpacked form that is: |
248 | | // p0 p1 p2 p3 |
249 | | // row 0: [ Y1 Cb Cr ] [ Y2 ... ... ] [ Y1 Cb Cr ] [ Y2 ... ... ] ... |
250 | | // row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ... |
251 | | // row 2: [ Y1 Cb Cr ] [ Y2 ... ... ] [ Y1 Cb Cr ] [ Y2 ... ... ] ... |
252 | | // row 3: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ... |
253 | | // .. . . .. . . .. . . .. . . |
254 | | // i.e. on even rows, even pixels are full, rest of pixels need interpolation |
255 | | // first, on even rows, odd pixels are interpolated using 422 algo (marked *) |
256 | | // p0 p1 p2 p3 |
257 | | // row 0: [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
258 | | // row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ... |
259 | | // row 2: [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
260 | | // row 3: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ... |
261 | | // .. . . .. . . .. . . |
262 | | // then, on odd rows, even pixels are interpolated (marked with #) |
263 | | // p0 p1 p2 p3 |
264 | | // row 0: [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
265 | | // row 1: [ Y3 Cb# Cr# ] [ Y4 ... ... ] [ Y3 Cb# Cr# ] [ Y4 ... ... ] ... |
266 | | // row 2: [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
267 | | // row 3: [ Y3 Cb# Cr# ] [ Y4 ... ... ] [ Y3 Cb# Cr# ] [ Y4 ... ... ] ... |
268 | | // .. . . .. . . .. . . |
269 | | // and finally, on odd rows, odd pixels are interpolated from * (marked $) |
270 | | // p0 p1 p2 p3 |
271 | | // row 0: [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
272 | | // row 1: [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] ... |
273 | | // row 2: [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] [ Y1 Cb Cr ] [ Y2 Cb* Cr* ] ... |
274 | | // row 3: [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] ... |
275 | | // .. . . .. . . .. . . |
276 | | // see http://lclevy.free.fr/cr2/#sraw |
277 | | |
278 | 0 | int MCUIdx; |
279 | 0 | for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) { |
280 | 0 | invariant(MCUIdx + 1 <= numMCUs); |
281 | | |
282 | | // For 4:2:0, one MCU encodes 4 pixels (2x2), and odd pixels need |
283 | | // interpolation, so we need to load eight pixels, |
284 | | // and thus we must load 4 MCU's. |
285 | 0 | std::array<std::array<MCUTy, 2>, 2> MCUs; |
286 | 0 | for (int Row = 0; Row < 2; ++Row) |
287 | 0 | for (int Col = 0; Col < 2; ++Col) |
288 | 0 | MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col); |
289 | | |
290 | | // Process first pixels of MCU's, which are full |
291 | 0 | for (int Row = 0; Row < 2; ++Row) |
292 | 0 | for (int Col = 0; Col < 2; ++Col) |
293 | 0 | MCUs[Row][Col][0][0].process(hue); |
294 | | |
295 | | // Interpolate the middle pixel of first row. |
296 | 0 | MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]); |
297 | | |
298 | | // Interpolate the first pixel of second row. |
299 | 0 | MCUs[0][0][1][0].interpolateCbCr(MCUs[0][0][0][0], MCUs[1][0][0][0]); |
300 | | |
301 | | // Interpolate the second pixel of second row. |
302 | 0 | MCUs[0][0][1][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0], |
303 | 0 | MCUs[1][0][0][0], MCUs[1][1][0][0]); |
304 | | |
305 | | // FIXME: we should instead simply interpolate odd pixels on even rows |
306 | | // and then even pixels on odd rows, as specified in the standard. |
307 | | // for (int Row = 0; Row < 2; ++Row) |
308 | | // MCUs[Row][0][0][1].interpolateCbCr(MCUs[Row][0][0][0], |
309 | | // MCUs[Row][1][0][0]); |
310 | | // for (int Col = 0; Col < 2; ++Col) |
311 | | // MCUs[0][0][1][Col].interpolateCbCr(MCUs[0][0][0][Col], |
312 | | // MCUs[1][0][0][Col]); |
313 | | |
314 | | // And finally, store the first MCU, i.e. first two pixels on two rows. |
315 | 0 | StoreMCU(MCUs[0][0], MCUIdx, row); |
316 | 0 | } |
317 | | |
318 | 0 | invariant(MCUIdx + 1 == numMCUs); |
319 | | |
320 | | // Last two pixels of the lines, the packed input format is: |
321 | | // p0 p1 p2 p3 p0 p0 |
322 | | // row 0: ... [ Y1 Y2 Y3 Y4 Cb Cr ] |
323 | | // row 1: ... [ Y1 Y2 Y3 Y4 Cb Cr ] |
324 | | // .. .. .. .. . . |
325 | | // in unpacked form that is: |
326 | | // p0 p1 |
327 | | // row 0: ... [ Y1 Cb Cr ] [ Y2 ... ... ] |
328 | | // row 1: ... [ Y3 ... ... ] [ Y4 ... ... ] |
329 | | // row 2: ... [ Y1 Cb Cr ] [ Y2 ... ... ] |
330 | | // row 3: ... [ Y3 ... ... ] [ Y4 ... ... ] |
331 | | // .. . . .. . . |
332 | | |
333 | 0 | std::array<MCUTy, 2> MCUs; |
334 | 0 | for (int Row = 0; Row < 2; ++Row) |
335 | 0 | MCUs[Row] = LoadMCU(row + Row, MCUIdx); |
336 | |
|
337 | 0 | for (int Row = 0; Row < 2; ++Row) |
338 | 0 | MCUs[Row][0][0].process(hue); |
339 | |
|
340 | 0 | MCUs[0][1][0].interpolateCbCr(MCUs[0][0][0], MCUs[1][0][0]); |
341 | |
|
342 | 0 | for (int Row = 0; Row < 2; ++Row) |
343 | 0 | YCbCr::CopyCbCr(&MCUs[0][Row][1], MCUs[0][Row][0]); |
344 | |
|
345 | 0 | StoreMCU(MCUs[0], MCUIdx, row); |
346 | 0 | } Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420_row<1>(int) Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420_row<2>(int) |
347 | | |
348 | 0 | template <int version> void Cr2sRawInterpolator::interpolate_420() { |
349 | 0 | const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef()); |
350 | |
|
351 | 0 | constexpr int X_S_F = 2; |
352 | 0 | constexpr int Y_S_F = 2; |
353 | 0 | constexpr int PixelsPerMCU = X_S_F * Y_S_F; |
354 | 0 | constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU; |
355 | |
|
356 | 0 | constexpr int YsPerMCU = PixelsPerMCU; |
357 | 0 | constexpr int ComponentsPerPixel = 3; |
358 | 0 | constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU; |
359 | |
|
360 | 0 | invariant(input.width() % InputComponentsPerMCU == 0); |
361 | 0 | int numMCUs = input.width() / InputComponentsPerMCU; |
362 | 0 | invariant(numMCUs > 1); |
363 | | |
364 | 0 | using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>; |
365 | |
|
366 | 0 | auto LoadMCU = [input_ = input](int Row, int MCUIdx) |
367 | 0 | __attribute__((always_inline)) { |
368 | 0 | MCUTy MCU; |
369 | 0 | for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) { |
370 | 0 | for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) { |
371 | 0 | YCbCr::LoadY(&MCU[MCURow][MCUCol], |
372 | 0 | input_[Row].getCrop(InputComponentsPerMCU * MCUIdx + |
373 | 0 | X_S_F * MCURow + MCUCol, |
374 | 0 | 1)); |
375 | 0 | } |
376 | 0 | } |
377 | 0 | YCbCr::LoadCbCr( |
378 | 0 | &MCU[0][0], |
379 | 0 | input_[Row].getCrop(InputComponentsPerMCU * MCUIdx + YsPerMCU, 2)); |
380 | 0 | return MCU; |
381 | 0 | }; Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<1>()::{lambda(int, int)#1}::operator()(int, int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<2>()::{lambda(int, int)#1}::operator()(int, int) const |
382 | 0 | auto StoreMCU = [ this, out ](const MCUTy& MCU, int MCUIdx, int Row) |
383 | 0 | __attribute__((always_inline)) { |
384 | 0 | for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) { |
385 | 0 | for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) { |
386 | 0 | YUV_TO_RGB<version>(MCU[MCURow][MCUCol], |
387 | 0 | out[2 * Row + MCURow].getCrop( |
388 | 0 | ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) + |
389 | 0 | ComponentsPerPixel * MCUCol, |
390 | 0 | 3)); |
391 | 0 | } |
392 | 0 | } |
393 | 0 | }; Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<1>()::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<2>()::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const |
394 | |
|
395 | 0 | int row = 0; |
396 | 0 | #ifdef HAVE_OPENMP |
397 | 0 | #pragma omp parallel for default(none) schedule(static) \ |
398 | 0 | num_threads(rawspeed_get_number_of_processor_cores()) firstprivate(out) \ |
399 | 0 | lastprivate(row) |
400 | 0 | #endif |
401 | 0 | for (row = 0; row < input.height() - 1; ++row) |
402 | 0 | interpolate_420_row<version>(row); Unexecuted instantiation: Cr2sRawInterpolator.cpp:void rawspeed::Cr2sRawInterpolator::interpolate_420<1>() [clone .omp_outlined_debug__] Unexecuted instantiation: Cr2sRawInterpolator.cpp:void rawspeed::Cr2sRawInterpolator::interpolate_420<2>() [clone .omp_outlined_debug__] |
403 | |
|
404 | 0 | invariant(row + 1 == input.height()); |
405 | | |
406 | | // Last two lines, the packed input format is: |
407 | | // p0 p1 p2 p3 p0 p0 p4 p5 p6 p7 p4 p4 |
408 | | // .. .. .. .. . . .. .. .. .. . . |
409 | | // row 0: [ Y1 Y2 Y3 Y4 Cb Cr ] [ Y1 Y2 Y3 Y4 Cb Cr ] ... |
410 | | // in unpacked form that is: |
411 | | // p0 p1 p2 p3 |
412 | | // .. . . .. . . .. . . .. . . |
413 | | // row 0: [ Y1 Cb Cr ] [ Y2 ... ... ] [ Y1 Cb Cr ] [ Y2 ... ... ] ... |
414 | | // row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ... |
415 | | |
416 | 0 | int MCUIdx; |
417 | 0 | for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) { |
418 | 0 | invariant(MCUIdx + 1 < numMCUs); |
419 | | |
420 | | // For 4:2:0, one MCU encodes 4 pixels (2x2), and odd pixels need |
421 | | // interpolation, so we need to load eight pixels, |
422 | | // and thus we must load 4 MCU's. |
423 | 0 | std::array<std::array<MCUTy, 2>, 1> MCUs; |
424 | 0 | for (int Row = 0; Row < 1; ++Row) |
425 | 0 | for (int Col = 0; Col < 2; ++Col) |
426 | 0 | MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col); |
427 | | |
428 | | // Process first pixels of MCU's, which are full |
429 | 0 | for (int Row = 0; Row < 1; ++Row) |
430 | 0 | for (int Col = 0; Col < 2; ++Col) |
431 | 0 | MCUs[Row][Col][0][0].process(hue); |
432 | | |
433 | | // Interpolate the middle pixel of first row. |
434 | 0 | MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]); |
435 | | |
436 | | // Copy Cb/Cr to the first two pixels of second row from the two pixels |
437 | | // of first row. |
438 | 0 | for (int Col = 0; Col < 2; ++Col) |
439 | 0 | YCbCr::CopyCbCr(&MCUs[0][0][1][Col], MCUs[0][0][0][Col]); |
440 | | |
441 | | // And finally, store the first MCU, i.e. first two pixels on two rows. |
442 | 0 | StoreMCU(MCUs[0][0], MCUIdx, row); |
443 | 0 | } |
444 | | |
445 | 0 | invariant(MCUIdx + 1 == numMCUs); |
446 | | |
447 | | // Last two pixels of last two lines, the packed input format is: |
448 | | // p0 p1 p2 p3 p0 p0 |
449 | | // .. .. .. .. . . |
450 | | // row 0: ... [ Y1 Y2 Y3 Y4 Cb Cr ] |
451 | | // in unpacked form that is: |
452 | | // p0 p1 |
453 | | // .. . . .. . . |
454 | | // row 0: ... [ Y1 Cb Cr ] [ Y2 ... ... ] |
455 | | // row 1: ... [ Y3 ... ... ] [ Y4 ... ... ] |
456 | | |
457 | 0 | MCUTy MCU = LoadMCU(row, MCUIdx); |
458 | |
|
459 | 0 | MCU[0][0].process(hue); |
460 | | |
461 | | // Distribute the same Cb/Cr to all four pixels. |
462 | 0 | for (int Row = 0; Row < 2; ++Row) |
463 | 0 | for (int Col = 0; Col < 2; ++Col) |
464 | 0 | YCbCr::CopyCbCr(&MCU[Row][Col], MCU[0][0]); |
465 | |
|
466 | 0 | StoreMCU(MCU, MCUIdx, row); |
467 | 0 | } Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420<1>() Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420<2>() |
468 | | |
469 | | inline void Cr2sRawInterpolator::STORE_RGB(CroppedArray1DRef<uint16_t> out, |
470 | 0 | int r, int g, int b) { |
471 | 0 | invariant(out.size() == 3); |
472 | 0 | out(0) = clampBits(r >> 8, 16); |
473 | 0 | out(1) = clampBits(g >> 8, 16); |
474 | 0 | out(2) = clampBits(b >> 8, 16); |
475 | 0 | } |
476 | | |
477 | | template </* int version */> |
478 | | /* Algorithm found in EOS 40D */ |
479 | | inline void |
480 | | Cr2sRawInterpolator::YUV_TO_RGB<0>(const YCbCr& p, |
481 | 0 | CroppedArray1DRef<uint16_t> out) { |
482 | 0 | int r = sraw_coeffs[0] * (p.Y + p.Cr - 512); |
483 | 0 | int g = sraw_coeffs[1] * (p.Y + ((-778 * p.Cb - (p.Cr * 2048)) >> 12) - 512); |
484 | 0 | int b = sraw_coeffs[2] * (p.Y + (p.Cb - 512)); |
485 | 0 | STORE_RGB(out, r, g, b); |
486 | 0 | } |
487 | | |
488 | | template </* int version */> |
489 | | inline void |
490 | | Cr2sRawInterpolator::YUV_TO_RGB<1>(const YCbCr& p, |
491 | 0 | CroppedArray1DRef<uint16_t> out) { |
492 | 0 | int r = sraw_coeffs[0] * (p.Y + ((50 * p.Cb + 22929 * p.Cr) >> 12)); |
493 | 0 | int g = sraw_coeffs[1] * (p.Y + ((-5640 * p.Cb - 11751 * p.Cr) >> 12)); |
494 | 0 | int b = sraw_coeffs[2] * (p.Y + ((29040 * p.Cb - 101 * p.Cr) >> 12)); |
495 | 0 | STORE_RGB(out, r, g, b); |
496 | 0 | } |
497 | | |
498 | | template </* int version */> |
499 | | /* Algorithm found in EOS 5d Mk III */ |
500 | | inline void |
501 | | Cr2sRawInterpolator::YUV_TO_RGB<2>(const YCbCr& p, |
502 | 0 | CroppedArray1DRef<uint16_t> out) { |
503 | 0 | int r = sraw_coeffs[0] * (p.Y + p.Cr); |
504 | 0 | int g = sraw_coeffs[1] * (p.Y + ((-778 * p.Cb - (p.Cr * 2048)) >> 12)); |
505 | 0 | int b = sraw_coeffs[2] * (p.Y + p.Cb); |
506 | 0 | STORE_RGB(out, r, g, b); |
507 | 0 | } |
508 | | |
509 | | // Interpolate and convert sRaw data. |
510 | 0 | void Cr2sRawInterpolator::interpolate(int version) { |
511 | 0 | invariant(version >= 0 && version <= 2); |
512 | | |
513 | 0 | const auto& subSampling = mRaw->metadata.subsampling; |
514 | 0 | if (subSampling.y == 1 && subSampling.x == 2) { |
515 | 0 | switch (version) { |
516 | 0 | case 0: |
517 | 0 | interpolate_422<0>(); |
518 | 0 | break; |
519 | 0 | case 1: |
520 | 0 | interpolate_422<1>(); |
521 | 0 | break; |
522 | 0 | case 2: |
523 | 0 | interpolate_422<2>(); |
524 | 0 | break; |
525 | 0 | default: |
526 | 0 | __builtin_unreachable(); |
527 | 0 | } |
528 | 0 | } else if (subSampling.y == 2 && subSampling.x == 2) { |
529 | 0 | switch (version) { |
530 | | // no known sraws with "version 0" |
531 | 0 | case 1: |
532 | 0 | interpolate_420<1>(); |
533 | 0 | break; |
534 | 0 | case 2: |
535 | 0 | interpolate_420<2>(); |
536 | 0 | break; |
537 | 0 | default: |
538 | 0 | __builtin_unreachable(); |
539 | 0 | } |
540 | 0 | } else |
541 | 0 | ThrowRDE("Unknown subsampling: (%i; %i)", subSampling.x, subSampling.y); |
542 | 0 | } |
543 | | |
544 | | } // namespace rawspeed |