Coverage Report

Created: 2024-09-08 06:18

/src/librawspeed/src/librawspeed/interpolators/Cr2sRawInterpolator.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
    RawSpeed - RAW file decoder.
3
4
    Copyright (C) 2009-2014 Klaus Post
5
    Copyright (C) 2015-2017 Roman Lebedev
6
7
    This library is free software; you can redistribute it and/or
8
    modify it under the terms of the GNU Lesser General Public
9
    License as published by the Free Software Foundation; either
10
    version 2 of the License, or (at your option) any later version.
11
12
    This library is distributed in the hope that it will be useful,
13
    but WITHOUT ANY WARRANTY; without even the implied warranty of
14
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
    Lesser General Public License for more details.
16
17
    You should have received a copy of the GNU Lesser General Public
18
    License along with this library; if not, write to the Free Software
19
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
*/
21
22
#include "rawspeedconfig.h"
23
#include "interpolators/Cr2sRawInterpolator.h"
24
#include "adt/Array2DRef.h"
25
#include "adt/Bit.h"
26
#include "adt/CroppedArray1DRef.h"
27
#include "adt/Invariant.h"
28
#include "adt/Point.h"
29
#include "common/Common.h"
30
#include "common/RawImage.h"
31
#include "decoders/RawDecoderException.h"
32
#include <array>
33
#include <cstdint>
34
35
namespace rawspeed {
36
37
struct Cr2sRawInterpolator::YCbCr final {
38
  int Y = 0;
39
  int Cb = 0;
40
  int Cr = 0;
41
42
0
  static void LoadY(YCbCr* p, const CroppedArray1DRef<const uint16_t> in) {
43
0
    invariant(p);
44
0
    invariant(in.size() == 1);
45
46
0
    p->Y = in(0);
47
0
  }
48
49
0
  static void LoadCbCr(YCbCr* p, const CroppedArray1DRef<const uint16_t> in) {
50
0
    invariant(p);
51
0
    invariant(in.size() == 2);
52
53
0
    p->Cb = in(0);
54
0
    p->Cr = in(1);
55
0
  }
56
57
0
  static void CopyCbCr(YCbCr* p, const YCbCr& pSrc) {
58
0
    invariant(p);
59
60
0
    p->Cb = pSrc.Cb;
61
0
    p->Cr = pSrc.Cr;
62
0
  }
63
64
0
  YCbCr() = default;
65
66
0
  void signExtend() {
67
0
    Cb -= 16384;
68
0
    Cr -= 16384;
69
0
  }
70
71
0
  void applyHue(int hue_) {
72
0
    Cb += hue_;
73
0
    Cr += hue_;
74
0
  }
75
76
0
  void process(int hue_) {
77
0
    signExtend();
78
0
    applyHue(hue_);
79
0
  }
80
81
0
  void interpolateCbCr(const YCbCr& p0, const YCbCr& p2) {
82
    // Y is already good, need to interpolate Cb and Cr
83
    // FIXME: dcraw does +1 before >> 1
84
0
    Cb = (p0.Cb + p2.Cb) >> 1;
85
0
    Cr = (p0.Cr + p2.Cr) >> 1;
86
0
  }
87
88
  void interpolateCbCr(const YCbCr& p0, const YCbCr& p1, const YCbCr& p2,
89
0
                       const YCbCr& p3) {
90
    // Y is already good, need to interpolate Cb and Cr
91
    // FIXME: dcraw does +1 before >> 1
92
0
    Cb = (p0.Cb + p1.Cb + p2.Cb + p3.Cb) >> 2;
93
0
    Cr = (p0.Cr + p1.Cr + p2.Cr + p3.Cr) >> 2;
94
0
  }
95
};
96
97
0
template <int version> void Cr2sRawInterpolator::interpolate_422_row(int row) {
98
0
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
99
100
0
  constexpr int InputComponentsPerMCU = 4;
101
0
  constexpr int PixelsPerMCU = 2;
102
0
  constexpr int YsPerMCU = PixelsPerMCU;
103
0
  constexpr int ComponentsPerPixel = 3;
104
0
  constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
105
106
0
  invariant(input.width() % InputComponentsPerMCU == 0);
107
0
  int numMCUs = input.width() / InputComponentsPerMCU;
108
0
  invariant(numMCUs > 1);
109
110
0
  using MCUTy = std::array<YCbCr, PixelsPerMCU>;
111
112
0
  auto LoadMCU = [input_ = input, row](int MCUIdx) {
113
0
    MCUTy MCU;
114
0
    for (int YIdx = 0; YIdx < PixelsPerMCU; ++YIdx)
115
0
      YCbCr::LoadY(&MCU[YIdx], input_[row].getCrop(
116
0
                                   InputComponentsPerMCU * MCUIdx + YIdx, 1));
117
0
    YCbCr::LoadCbCr(&MCU[0], input_[row].getCrop(
118
0
                                 InputComponentsPerMCU * MCUIdx + YsPerMCU, 2));
119
0
    return MCU;
120
0
  };
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<0>(int)::{lambda(int)#1}::operator()(int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<1>(int)::{lambda(int)#1}::operator()(int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<2>(int)::{lambda(int)#1}::operator()(int) const
121
0
  auto StoreMCU = [this, out, row](const MCUTy& MCU, int MCUIdx) {
122
0
    for (int Pixel = 0; Pixel < PixelsPerMCU; ++Pixel) {
123
0
      YUV_TO_RGB<version>(MCU[Pixel],
124
0
                          out[row].getCrop(OutputComponentsPerMCU * MCUIdx +
125
0
                                               ComponentsPerPixel * Pixel,
126
0
                                           3));
127
0
    }
128
0
  };
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<0>(int)::{lambda(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int)#1}::operator()(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<1>(int)::{lambda(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int)#1}::operator()(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_422_row<2>(int)::{lambda(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int)#1}::operator()(std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul> const&, int) const
129
130
  // The packed input format is:
131
  //   p0 p1 p0 p0     p2 p3 p2 p2
132
  //  [ Y1 Y2 Cb Cr ] [ Y1 Y2 Cb Cr ] ...
133
  // in unpacked form that is:
134
  //   p0             p1             p2             p3
135
  //  [ Y1 Cb  Cr  ] [ Y2 ... ... ] [ Y1 Cb  Cr  ] [ Y2 ... ... ] ...
136
  // i.e. even pixels are full, odd pixels need interpolation:
137
  //   p0             p1             p2             p3
138
  //  [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
139
  // for last (odd) pixel of the line,  just keep Cb/Cr from previous pixel
140
  // see http://lclevy.free.fr/cr2/#sraw
141
142
0
  int MCUIdx;
143
  // Process all MCU's except the last one.
144
0
  for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
145
0
    invariant(MCUIdx + 1 <= numMCUs);
146
147
    // For 4:2:2, one MCU encodes 2 pixels, and odd pixels need interpolation,
148
    // so we need to load three pixels, and thus we must load 2 MCU's.
149
0
    std::array<MCUTy, 2> MCUs;
150
0
    for (int SubMCUIdx = 0; static_cast<unsigned>(SubMCUIdx) < MCUs.size();
151
0
         ++SubMCUIdx)
152
0
      MCUs[SubMCUIdx] = LoadMCU(MCUIdx + SubMCUIdx);
153
154
    // Process first pixel, which is full
155
0
    MCUs[0][0].process(hue);
156
    // Process third pixel, which is, again, full
157
0
    MCUs[1][0].process(hue);
158
    // Interpolate the middle pixel, for which only the Y was known.
159
0
    MCUs[0][1].interpolateCbCr(MCUs[0][0], MCUs[1][0]);
160
161
    // And finally, store the first MCU, i.e. first two pixels.
162
0
    StoreMCU(MCUs[0], MCUIdx);
163
0
  }
164
165
0
  invariant(MCUIdx + 1 == numMCUs);
166
167
  // Last two pixels, the packed input format is:
168
  //      p0 p1 p0 p0
169
  //  .. [ Y1 Y2 Cb Cr ]
170
  // in unpacked form that is:
171
  //      p0             p1
172
  //  .. [ Y1 Cb  Cr  ] [ Y2 ... ... ]
173
174
0
  MCUTy MCU = LoadMCU(MCUIdx);
175
176
0
  MCU[0].process(hue);
177
0
  YCbCr::CopyCbCr(&MCU[1], MCU[0]);
178
179
0
  StoreMCU(MCU, MCUIdx);
180
0
}
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422_row<0>(int)
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422_row<1>(int)
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422_row<2>(int)
181
182
0
template <int version> void Cr2sRawInterpolator::interpolate_422() {
183
0
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
184
0
  invariant(out.width() > 0);
185
0
  invariant(out.height() > 0);
186
187
  // Benchmarking suggests that for real-world usage, it is not beneficial to
188
  // parallelize this, and in fact leads to worse performance.
189
0
  for (int row = 0; row < out.height(); row++)
190
0
    interpolate_422_row<version>(row);
191
0
}
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422<0>()
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422<1>()
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_422<2>()
192
193
0
template <int version> void Cr2sRawInterpolator::interpolate_420_row(int row) {
194
0
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
195
196
0
  constexpr int X_S_F = 2;
197
0
  constexpr int Y_S_F = 2;
198
0
  constexpr int PixelsPerMCU = X_S_F * Y_S_F;
199
0
  constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU;
200
201
0
  constexpr int YsPerMCU = PixelsPerMCU;
202
0
  constexpr int ComponentsPerPixel = 3;
203
0
  constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
204
205
0
  invariant(input.width() % InputComponentsPerMCU == 0);
206
0
  int numMCUs = input.width() / InputComponentsPerMCU;
207
0
  invariant(numMCUs > 1);
208
209
0
  using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>;
210
211
0
  auto LoadMCU = [input_ = input](int Row, int MCUIdx)
212
0
      __attribute__((always_inline)) {
213
0
    MCUTy MCU;
214
0
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
215
0
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
216
0
        YCbCr::LoadY(&MCU[MCURow][MCUCol],
217
0
                     input_[Row].getCrop(InputComponentsPerMCU * MCUIdx +
218
0
                                             X_S_F * MCURow + MCUCol,
219
0
                                         1));
220
0
      }
221
0
    }
222
0
    YCbCr::LoadCbCr(
223
0
        &MCU[0][0],
224
0
        input_[Row].getCrop(InputComponentsPerMCU * MCUIdx + YsPerMCU, 2));
225
0
    return MCU;
226
0
  };
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<1>(int)::{lambda(int, int)#1}::operator()(int, int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<2>(int)::{lambda(int, int)#1}::operator()(int, int) const
227
0
  auto StoreMCU = [ this, out ](const MCUTy& MCU, int MCUIdx, int Row)
228
0
      __attribute__((always_inline)) {
229
0
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
230
0
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
231
0
        YUV_TO_RGB<version>(MCU[MCURow][MCUCol],
232
0
                            out[2 * Row + MCURow].getCrop(
233
0
                                ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) +
234
0
                                    ComponentsPerPixel * MCUCol,
235
0
                                3));
236
0
      }
237
0
    }
238
0
  };
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<1>(int)::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420_row<2>(int)::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const
239
240
0
  invariant(row + 1 <= input.height());
241
242
  // The packed input format is:
243
  //          p0 p1 p2 p3 p0 p0     p4 p5 p6 p7 p4 p4
244
  //  row 0: [ Y1 Y2 Y3 Y4 Cb Cr ] [ Y1 Y2 Y3 Y4 Cb Cr ] ...
245
  //  row 1: [ Y1 Y2 Y3 Y4 Cb Cr ] [ Y1 Y2 Y3 Y4 Cb Cr ] ...
246
  //           .. .. .. .. .  .      .. .. .. .. .  .
247
  // in unpacked form that is:
248
  //          p0             p1             p2             p3
249
  //  row 0: [ Y1 Cb  Cr  ] [ Y2 ... ... ] [ Y1 Cb  Cr  ] [ Y2 ... ... ] ...
250
  //  row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ...
251
  //  row 2: [ Y1 Cb  Cr  ] [ Y2 ... ... ] [ Y1 Cb  Cr  ] [ Y2 ... ... ] ...
252
  //  row 3: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ...
253
  //           .. .   .       .. .   .       .. .   .       .. .   .
254
  // i.e. on even rows, even pixels are full, rest of pixels need interpolation
255
  // first, on even rows, odd pixels are interpolated using 422 algo (marked *)
256
  //          p0             p1             p2             p3
257
  //  row 0: [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
258
  //  row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ...
259
  //  row 2: [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
260
  //  row 3: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ...
261
  //           .. .   .       .. .   .       .. .   .
262
  // then,  on odd rows, even pixels are interpolated (marked with #)
263
  //          p0             p1             p2             p3
264
  //  row 0: [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
265
  //  row 1: [ Y3 Cb# Cr# ] [ Y4 ... ... ] [ Y3 Cb# Cr# ] [ Y4 ... ... ] ...
266
  //  row 2: [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
267
  //  row 3: [ Y3 Cb# Cr# ] [ Y4 ... ... ] [ Y3 Cb# Cr# ] [ Y4 ... ... ] ...
268
  //           .. .   .       .. .   .       .. .   .
269
  // and finally, on odd rows, odd pixels are interpolated from * (marked $)
270
  //          p0             p1             p2             p3
271
  //  row 0: [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
272
  //  row 1: [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] ...
273
  //  row 2: [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] [ Y1 Cb  Cr  ] [ Y2 Cb* Cr* ] ...
274
  //  row 3: [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] [ Y3 Cb# Cr# ] [ Y4 Cb$ Cr$ ] ...
275
  //           .. .   .       .. .   .       .. .   .
276
  // see http://lclevy.free.fr/cr2/#sraw
277
278
0
  int MCUIdx;
279
0
  for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
280
0
    invariant(MCUIdx + 1 <= numMCUs);
281
282
    // For 4:2:0, one MCU encodes 4 pixels (2x2), and odd pixels need
283
    // interpolation, so we need to load eight pixels,
284
    // and thus we must load 4 MCU's.
285
0
    std::array<std::array<MCUTy, 2>, 2> MCUs;
286
0
    for (int Row = 0; Row < 2; ++Row)
287
0
      for (int Col = 0; Col < 2; ++Col)
288
0
        MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col);
289
290
    // Process first pixels of MCU's, which are full
291
0
    for (int Row = 0; Row < 2; ++Row)
292
0
      for (int Col = 0; Col < 2; ++Col)
293
0
        MCUs[Row][Col][0][0].process(hue);
294
295
    // Interpolate the middle pixel of first row.
296
0
    MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]);
297
298
    // Interpolate the first pixel of second row.
299
0
    MCUs[0][0][1][0].interpolateCbCr(MCUs[0][0][0][0], MCUs[1][0][0][0]);
300
301
    // Interpolate the second pixel of second row.
302
0
    MCUs[0][0][1][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0],
303
0
                                     MCUs[1][0][0][0], MCUs[1][1][0][0]);
304
305
    // FIXME: we should instead simply interpolate odd pixels on even rows
306
    //        and then even pixels on odd rows, as specified in the standard.
307
    // for (int Row = 0; Row < 2; ++Row)
308
    //   MCUs[Row][0][0][1].interpolateCbCr(MCUs[Row][0][0][0],
309
    //                                      MCUs[Row][1][0][0]);
310
    // for (int Col = 0; Col < 2; ++Col)
311
    //   MCUs[0][0][1][Col].interpolateCbCr(MCUs[0][0][0][Col],
312
    //                                      MCUs[1][0][0][Col]);
313
314
    // And finally, store the first MCU, i.e. first two pixels on two rows.
315
0
    StoreMCU(MCUs[0][0], MCUIdx, row);
316
0
  }
317
318
0
  invariant(MCUIdx + 1 == numMCUs);
319
320
  // Last two pixels of the lines, the packed input format is:
321
  //              p0 p1 p2 p3 p0 p0
322
  //  row 0: ... [ Y1 Y2 Y3 Y4 Cb Cr ]
323
  //  row 1: ... [ Y1 Y2 Y3 Y4 Cb Cr ]
324
  //               .. .. .. .. .  .
325
  // in unpacked form that is:
326
  //              p0             p1
327
  //  row 0: ... [ Y1 Cb  Cr  ] [ Y2 ... ... ]
328
  //  row 1: ... [ Y3 ... ... ] [ Y4 ... ... ]
329
  //  row 2: ... [ Y1 Cb  Cr  ] [ Y2 ... ... ]
330
  //  row 3: ... [ Y3 ... ... ] [ Y4 ... ... ]
331
  //               .. .   .       .. .   .
332
333
0
  std::array<MCUTy, 2> MCUs;
334
0
  for (int Row = 0; Row < 2; ++Row)
335
0
    MCUs[Row] = LoadMCU(row + Row, MCUIdx);
336
337
0
  for (int Row = 0; Row < 2; ++Row)
338
0
    MCUs[Row][0][0].process(hue);
339
340
0
  MCUs[0][1][0].interpolateCbCr(MCUs[0][0][0], MCUs[1][0][0]);
341
342
0
  for (int Row = 0; Row < 2; ++Row)
343
0
    YCbCr::CopyCbCr(&MCUs[0][Row][1], MCUs[0][Row][0]);
344
345
0
  StoreMCU(MCUs[0], MCUIdx, row);
346
0
}
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420_row<1>(int)
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420_row<2>(int)
347
348
0
template <int version> void Cr2sRawInterpolator::interpolate_420() {
349
0
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
350
351
0
  constexpr int X_S_F = 2;
352
0
  constexpr int Y_S_F = 2;
353
0
  constexpr int PixelsPerMCU = X_S_F * Y_S_F;
354
0
  constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU;
355
356
0
  constexpr int YsPerMCU = PixelsPerMCU;
357
0
  constexpr int ComponentsPerPixel = 3;
358
0
  constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
359
360
0
  invariant(input.width() % InputComponentsPerMCU == 0);
361
0
  int numMCUs = input.width() / InputComponentsPerMCU;
362
0
  invariant(numMCUs > 1);
363
364
0
  using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>;
365
366
0
  auto LoadMCU = [input_ = input](int Row, int MCUIdx)
367
0
      __attribute__((always_inline)) {
368
0
    MCUTy MCU;
369
0
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
370
0
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
371
0
        YCbCr::LoadY(&MCU[MCURow][MCUCol],
372
0
                     input_[Row].getCrop(InputComponentsPerMCU * MCUIdx +
373
0
                                             X_S_F * MCURow + MCUCol,
374
0
                                         1));
375
0
      }
376
0
    }
377
0
    YCbCr::LoadCbCr(
378
0
        &MCU[0][0],
379
0
        input_[Row].getCrop(InputComponentsPerMCU * MCUIdx + YsPerMCU, 2));
380
0
    return MCU;
381
0
  };
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<1>()::{lambda(int, int)#1}::operator()(int, int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<2>()::{lambda(int, int)#1}::operator()(int, int) const
382
0
  auto StoreMCU = [ this, out ](const MCUTy& MCU, int MCUIdx, int Row)
383
0
      __attribute__((always_inline)) {
384
0
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
385
0
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
386
0
        YUV_TO_RGB<version>(MCU[MCURow][MCUCol],
387
0
                            out[2 * Row + MCURow].getCrop(
388
0
                                ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) +
389
0
                                    ComponentsPerPixel * MCUCol,
390
0
                                3));
391
0
      }
392
0
    }
393
0
  };
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<1>()::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const
Unexecuted instantiation: rawspeed::Cr2sRawInterpolator::interpolate_420<2>()::{lambda(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int)#1}::operator()(std::__1::array<std::__1::array<rawspeed::Cr2sRawInterpolator::YCbCr, 2ul>, 2ul> const&, int, int) const
394
395
0
  int row = 0;
396
0
#ifdef HAVE_OPENMP
397
0
#pragma omp parallel for default(none) schedule(static)                        \
398
0
    num_threads(rawspeed_get_number_of_processor_cores()) firstprivate(out)    \
399
0
    lastprivate(row)
400
0
#endif
401
0
  for (row = 0; row < input.height() - 1; ++row)
402
0
    interpolate_420_row<version>(row);
Unexecuted instantiation: Cr2sRawInterpolator.cpp:void rawspeed::Cr2sRawInterpolator::interpolate_420<1>() [clone .omp_outlined_debug__]
Unexecuted instantiation: Cr2sRawInterpolator.cpp:void rawspeed::Cr2sRawInterpolator::interpolate_420<2>() [clone .omp_outlined_debug__]
403
404
0
  invariant(row + 1 == input.height());
405
406
  // Last two lines, the packed input format is:
407
  //          p0 p1 p2 p3 p0 p0     p4 p5 p6 p7 p4 p4
408
  //           .. .. .. .. .  .      .. .. .. .. .  .
409
  //  row 0: [ Y1 Y2 Y3 Y4 Cb Cr ] [ Y1 Y2 Y3 Y4 Cb Cr ] ...
410
  // in unpacked form that is:
411
  //          p0             p1             p2             p3
412
  //           .. .   .       .. .   .       .. .   .       .. .   .
413
  //  row 0: [ Y1 Cb  Cr  ] [ Y2 ... ... ] [ Y1 Cb  Cr  ] [ Y2 ... ... ] ...
414
  //  row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ...
415
416
0
  int MCUIdx;
417
0
  for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
418
0
    invariant(MCUIdx + 1 < numMCUs);
419
420
    // For 4:2:0, one MCU encodes 4 pixels (2x2), and odd pixels need
421
    // interpolation, so we need to load eight pixels,
422
    // and thus we must load 4 MCU's.
423
0
    std::array<std::array<MCUTy, 2>, 1> MCUs;
424
0
    for (int Row = 0; Row < 1; ++Row)
425
0
      for (int Col = 0; Col < 2; ++Col)
426
0
        MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col);
427
428
    // Process first pixels of MCU's, which are full
429
0
    for (int Row = 0; Row < 1; ++Row)
430
0
      for (int Col = 0; Col < 2; ++Col)
431
0
        MCUs[Row][Col][0][0].process(hue);
432
433
    // Interpolate the middle pixel of first row.
434
0
    MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]);
435
436
    // Copy Cb/Cr to the first two pixels of second row from the two pixels
437
    // of first row.
438
0
    for (int Col = 0; Col < 2; ++Col)
439
0
      YCbCr::CopyCbCr(&MCUs[0][0][1][Col], MCUs[0][0][0][Col]);
440
441
    // And finally, store the first MCU, i.e. first two pixels on two rows.
442
0
    StoreMCU(MCUs[0][0], MCUIdx, row);
443
0
  }
444
445
0
  invariant(MCUIdx + 1 == numMCUs);
446
447
  // Last two pixels of last two lines, the packed input format is:
448
  //              p0 p1 p2 p3 p0 p0
449
  //               .. .. .. .. .  .
450
  //  row 0: ... [ Y1 Y2 Y3 Y4 Cb Cr ]
451
  // in unpacked form that is:
452
  //               p0             p1
453
  //                .. .   .       .. .   .
454
  //  row 0:  ... [ Y1 Cb  Cr  ] [ Y2 ... ... ]
455
  //  row 1:  ... [ Y3 ... ... ] [ Y4 ... ... ]
456
457
0
  MCUTy MCU = LoadMCU(row, MCUIdx);
458
459
0
  MCU[0][0].process(hue);
460
461
  // Distribute the same Cb/Cr to all four pixels.
462
0
  for (int Row = 0; Row < 2; ++Row)
463
0
    for (int Col = 0; Col < 2; ++Col)
464
0
      YCbCr::CopyCbCr(&MCU[Row][Col], MCU[0][0]);
465
466
0
  StoreMCU(MCU, MCUIdx, row);
467
0
}
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420<1>()
Unexecuted instantiation: void rawspeed::Cr2sRawInterpolator::interpolate_420<2>()
468
469
inline void Cr2sRawInterpolator::STORE_RGB(CroppedArray1DRef<uint16_t> out,
470
0
                                           int r, int g, int b) {
471
0
  invariant(out.size() == 3);
472
0
  out(0) = clampBits(r >> 8, 16);
473
0
  out(1) = clampBits(g >> 8, 16);
474
0
  out(2) = clampBits(b >> 8, 16);
475
0
}
476
477
template </* int version */>
478
/* Algorithm found in EOS 40D */
479
inline void
480
Cr2sRawInterpolator::YUV_TO_RGB<0>(const YCbCr& p,
481
0
                                   CroppedArray1DRef<uint16_t> out) {
482
0
  int r = sraw_coeffs[0] * (p.Y + p.Cr - 512);
483
0
  int g = sraw_coeffs[1] * (p.Y + ((-778 * p.Cb - (p.Cr * 2048)) >> 12) - 512);
484
0
  int b = sraw_coeffs[2] * (p.Y + (p.Cb - 512));
485
0
  STORE_RGB(out, r, g, b);
486
0
}
487
488
template </* int version */>
489
inline void
490
Cr2sRawInterpolator::YUV_TO_RGB<1>(const YCbCr& p,
491
0
                                   CroppedArray1DRef<uint16_t> out) {
492
0
  int r = sraw_coeffs[0] * (p.Y + ((50 * p.Cb + 22929 * p.Cr) >> 12));
493
0
  int g = sraw_coeffs[1] * (p.Y + ((-5640 * p.Cb - 11751 * p.Cr) >> 12));
494
0
  int b = sraw_coeffs[2] * (p.Y + ((29040 * p.Cb - 101 * p.Cr) >> 12));
495
0
  STORE_RGB(out, r, g, b);
496
0
}
497
498
template </* int version */>
499
/* Algorithm found in EOS 5d Mk III */
500
inline void
501
Cr2sRawInterpolator::YUV_TO_RGB<2>(const YCbCr& p,
502
0
                                   CroppedArray1DRef<uint16_t> out) {
503
0
  int r = sraw_coeffs[0] * (p.Y + p.Cr);
504
0
  int g = sraw_coeffs[1] * (p.Y + ((-778 * p.Cb - (p.Cr * 2048)) >> 12));
505
0
  int b = sraw_coeffs[2] * (p.Y + p.Cb);
506
0
  STORE_RGB(out, r, g, b);
507
0
}
508
509
// Interpolate and convert sRaw data.
510
0
void Cr2sRawInterpolator::interpolate(int version) {
511
0
  invariant(version >= 0 && version <= 2);
512
513
0
  const auto& subSampling = mRaw->metadata.subsampling;
514
0
  if (subSampling.y == 1 && subSampling.x == 2) {
515
0
    switch (version) {
516
0
    case 0:
517
0
      interpolate_422<0>();
518
0
      break;
519
0
    case 1:
520
0
      interpolate_422<1>();
521
0
      break;
522
0
    case 2:
523
0
      interpolate_422<2>();
524
0
      break;
525
0
    default:
526
0
      __builtin_unreachable();
527
0
    }
528
0
  } else if (subSampling.y == 2 && subSampling.x == 2) {
529
0
    switch (version) {
530
    // no known sraws with "version 0"
531
0
    case 1:
532
0
      interpolate_420<1>();
533
0
      break;
534
0
    case 2:
535
0
      interpolate_420<2>();
536
0
      break;
537
0
    default:
538
0
      __builtin_unreachable();
539
0
    }
540
0
  } else
541
0
    ThrowRDE("Unknown subsampling: (%i; %i)", subSampling.x, subSampling.y);
542
0
}
543
544
} // namespace rawspeed