Coverage Report

Created: 2024-09-08 06:18

/src/librawspeed/src/librawspeed/common/RawImageDataU16.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
    RawSpeed - RAW file decoder.
3
4
    Copyright (C) 2009-2014 Klaus Post
5
6
    This library is free software; you can redistribute it and/or
7
    modify it under the terms of the GNU Lesser General Public
8
    License as published by the Free Software Foundation; either
9
    version 2 of the License, or (at your option) any later version.
10
11
    This library is distributed in the hope that it will be useful,
12
    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
    Lesser General Public License for more details.
15
16
    You should have received a copy of the GNU Lesser General Public
17
    License along with this library; if not, write to the Free Software
18
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
*/
20
21
#include "rawspeedconfig.h"
22
#include "common/RawImage.h"
23
#include "adt/Array1DRef.h"
24
#include "adt/Array2DRef.h"
25
#include "adt/Bit.h"
26
#include "adt/Casts.h"
27
#include "adt/CroppedArray2DRef.h"
28
#include "adt/Point.h"
29
#include "common/Common.h"
30
#include "common/TableLookUp.h"
31
#include "decoders/RawDecoderException.h"
32
#include "metadata/BlackArea.h"
33
#include <algorithm>
34
#include <array>
35
#include <cassert>
36
#include <cstdint>
37
#include <memory>
38
#include <vector>
39
40
#ifdef WITH_SSE2
41
#include "common/CpuFeatures.h"
42
#include <emmintrin.h>
43
#endif
44
45
using std::array;
46
using std::max;
47
using std::min;
48
using std::vector;
49
50
namespace rawspeed {
51
52
113k
RawImageDataU16::RawImageDataU16() {
53
113k
  dataType = RawImageType::UINT16;
54
113k
  bpp = sizeof(uint16_t);
55
113k
}
56
57
RawImageDataU16::RawImageDataU16(const iPoint2D& _dim, uint32_t _cpp)
58
0
    : RawImageData(RawImageType::UINT16, _dim, sizeof(uint16_t), _cpp) {}
59
60
0
void RawImageDataU16::calculateBlackAreas() {
61
0
  const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();
62
63
0
  std::vector<uint16_t> histogramStorage;
64
0
  auto histogram = Array2DRef<uint16_t>::create(histogramStorage, 65536, 4);
65
66
0
  for (int row = 0; row != histogram.height(); ++row) {
67
0
    for (int col = 0; col != histogram.width(); ++col) {
68
0
      histogram(row, col) = 0;
69
0
    }
70
0
  }
71
72
0
  int totalpixels = 0;
73
74
0
  for (auto area : blackAreas) {
75
    /* Make sure area sizes are multiple of two,
76
       so we have the same amount of pixels for each CFA group */
77
0
    area.size = area.size - (area.size & 1);
78
79
    /* Process horizontal area */
80
0
    if (!area.isVertical) {
81
0
      if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
82
0
          uncropped_dim.y)
83
0
        ThrowRDE("Offset + size is larger than height of image");
84
0
      for (uint32_t y = area.offset; y < area.offset + area.size; y++) {
85
0
        for (int x = mOffset.x; x < dim.x + mOffset.x; x++) {
86
          // FIXME: this only samples a single row, not an area.
87
0
          const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
88
0
          const auto hBin = img(y, mOffset.x);
89
0
          localhist(hBin)++;
90
0
        }
91
0
      }
92
0
      totalpixels += area.size * dim.x;
93
0
    }
94
95
    /* Process vertical area */
96
0
    if (area.isVertical) {
97
0
      if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
98
0
          uncropped_dim.x)
99
0
        ThrowRDE("Offset + size is larger than width of image");
100
0
      for (int y = mOffset.y; y < dim.y + mOffset.y; y++) {
101
0
        for (uint32_t x = area.offset; x < area.size + area.offset; x++) {
102
          // FIXME: this only samples a single row, not an area.
103
0
          const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
104
0
          const auto hBin = img(y, area.offset);
105
0
          localhist(hBin)++;
106
0
        }
107
0
      }
108
0
      totalpixels += area.size * dim.y;
109
0
    }
110
0
  }
111
112
0
  blackLevelSeparate = Array2DRef(blackLevelSeparateStorage.data(), 2, 2);
113
0
  auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
114
115
0
  if (!totalpixels) {
116
0
    for (int& i : blackLevelSeparate1D)
117
0
      i = blackLevel;
118
0
    return;
119
0
  }
120
121
  /* Calculate median value of black areas for each component */
122
  /* Adjust the number of total pixels so it is the same as the median of each
123
   * histogram */
124
0
  totalpixels /= 4 * 2;
125
126
0
  for (int i = 0; i < 4; i++) {
127
0
    const auto localhist = histogram[i];
128
0
    int acc_pixels = localhist(0);
129
0
    int pixel_value = 0;
130
0
    while (acc_pixels <= totalpixels && pixel_value < 65535) {
131
0
      pixel_value++;
132
0
      acc_pixels += localhist(pixel_value);
133
0
    }
134
0
    blackLevelSeparate1D(i) = pixel_value;
135
0
  }
136
137
  /* If this is not a CFA image, we do not use separate blacklevels, use average
138
   */
139
0
  if (!isCFA) {
140
0
    int total = 0;
141
0
    for (int i : blackLevelSeparate1D)
142
0
      total += i;
143
0
    for (int& i : blackLevelSeparate1D)
144
0
      i = (total + 2) >> 2;
145
0
  }
146
0
}
147
148
0
void RawImageDataU16::scaleBlackWhite() {
149
0
  const int skipBorder = 250;
150
0
  int gw = (dim.x - skipBorder) * cpp;
151
0
  if ((blackAreas.empty() && !blackLevelSeparate && blackLevel < 0) ||
152
0
      !whitePoint) { // Estimate
153
0
    int b = 65536;
154
0
    int m = 0;
155
0
    auto img = getU16DataAsCroppedArray2DRef();
156
0
    for (int row = skipBorder; row < (dim.y - skipBorder); row++) {
157
0
      for (int col = skipBorder; col < gw; col++) {
158
0
        uint16_t pixel = img(row, skipBorder + col);
159
0
        b = min(static_cast<int>(pixel), b);
160
0
        m = max(static_cast<int>(pixel), m);
161
0
      }
162
0
    }
163
0
    if (blackLevel < 0)
164
0
      blackLevel = b;
165
0
    if (!whitePoint)
166
0
      whitePoint = m;
167
0
    writeLog(DEBUG_PRIO::INFO,
168
0
             "ISO:%d, Estimated black:%d, Estimated white: %d",
169
0
             metadata.isoSpeed, blackLevel, *whitePoint);
170
0
  }
171
172
  /* Skip, if not needed */
173
0
  if ((blackAreas.empty() && blackLevel == 0 && whitePoint == 65535 &&
174
0
       !blackLevelSeparate) ||
175
0
      dim.area() <= 0)
176
0
    return;
177
178
  /* If filter has not set separate blacklevel, compute or fetch it */
179
0
  if (!blackLevelSeparate)
180
0
    calculateBlackAreas();
181
182
0
  startWorker(RawImageWorker::RawImageWorkerTask::SCALE_VALUES, true);
183
0
}
184
185
0
void RawImageDataU16::scaleValues(int start_y, int end_y) {
186
#ifndef WITH_SSE2
187
188
  return scaleValues_plain(start_y, end_y);
189
190
#else
191
192
0
  int depth_values = *whitePoint - (*blackLevelSeparate)(0, 0);
193
0
  float app_scale = 65535.0F / implicit_cast<float>(depth_values);
194
195
  // Check SSE2
196
0
  if (Cpuid::SSE2() && app_scale < 63) {
197
0
    scaleValues_SSE2(start_y, end_y);
198
0
  } else {
199
0
    scaleValues_plain(start_y, end_y);
200
0
  }
201
202
0
#endif
203
0
}
204
205
#ifdef WITH_SSE2
206
0
void RawImageDataU16::scaleValues_SSE2(int start_y, int end_y) {
207
0
  assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
208
0
  auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
209
210
0
  int depth_values = *whitePoint - blackLevelSeparate1D(0);
211
0
  float app_scale = 65535.0F / implicit_cast<float>(depth_values);
212
213
  // Scale in 30.2 fp
214
0
  auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
215
  // Half Scale in 18.14 fp
216
0
  auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);
217
218
0
  __m128i sseround;
219
0
  __m128i ssesub2;
220
0
  __m128i ssesign;
221
0
  __m128i rand_mul;
222
0
  __m128i rand_mask;
223
0
  __m128i sse_full_scale_fp;
224
0
  __m128i sse_half_scale_fp;
225
226
0
  std::array<uint32_t, 4 * 4> sub_mul;
227
228
  // 10 bit fraction
229
0
  uint32_t mul = static_cast<int>(
230
0
      1024.0F * 65535.0F /
231
0
      static_cast<float>(*whitePoint - blackLevelSeparate1D(mOffset.x & 1)));
232
0
  mul |= (static_cast<int>(
233
0
             1024.0F * 65535.0F /
234
0
             static_cast<float>(*whitePoint -
235
0
                                blackLevelSeparate1D((mOffset.x + 1) & 1))))
236
0
         << 16;
237
0
  uint32_t b = blackLevelSeparate1D(mOffset.x & 1) |
238
0
               (blackLevelSeparate1D((mOffset.x + 1) & 1) << 16);
239
240
0
  for (int i = 0; i < 4; i++) {
241
0
    sub_mul[i] = b;       // Subtract even lines
242
0
    sub_mul[4 + i] = mul; // Multiply even lines
243
0
  }
244
245
0
  mul = static_cast<int>(
246
0
      1024.0F * 65535.0F /
247
0
      static_cast<float>(*whitePoint -
248
0
                         blackLevelSeparate1D(2 + (mOffset.x & 1))));
249
0
  mul |= (static_cast<int>(
250
0
             1024.0F * 65535.0F /
251
0
             static_cast<float>(*whitePoint - blackLevelSeparate1D(
252
0
                                                  2 + ((mOffset.x + 1) & 1)))))
253
0
         << 16;
254
0
  b = blackLevelSeparate1D(2 + (mOffset.x & 1)) |
255
0
      (blackLevelSeparate1D(2 + ((mOffset.x + 1) & 1)) << 16);
256
257
0
  for (int i = 0; i < 4; i++) {
258
0
    sub_mul[8 + i] = b;    // Subtract odd lines
259
0
    sub_mul[12 + i] = mul; // Multiply odd lines
260
0
  }
261
262
0
  sseround = _mm_set_epi32(512, 512, 512, 512);
263
0
  ssesub2 = _mm_set_epi32(32768, 32768, 32768, 32768);
264
0
  ssesign = _mm_set_epi32(0x80008000, 0x80008000, 0x80008000, 0x80008000);
265
0
  sse_full_scale_fp = _mm_set1_epi32(full_scale_fp | (full_scale_fp << 16));
266
0
  sse_half_scale_fp = _mm_set1_epi32(half_scale_fp >> 4);
267
268
0
  if (mDitherScale) {
269
0
    rand_mul = _mm_set1_epi32(0x4d9f1d32);
270
0
  } else {
271
0
    rand_mul = _mm_set1_epi32(0);
272
0
  }
273
0
  rand_mask = _mm_set1_epi32(0x00ff00ff); // 8 random bits
274
275
0
  Array2DRef<uint16_t> out(getU16DataAsUncroppedArray2DRef());
276
277
0
  for (int y = start_y; y < end_y; y++) {
278
0
    __m128i sserandom;
279
0
    if (mDitherScale) {
280
0
      sserandom =
281
0
          _mm_set_epi32(dim.x * 1676 + y * 18000, dim.x * 2342 + y * 34311,
282
0
                        dim.x * 4272 + y * 12123, dim.x * 1234 + y * 23464);
283
0
    } else {
284
0
      sserandom = _mm_setzero_si128();
285
0
    }
286
0
    __m128i ssescale;
287
0
    __m128i ssesub;
288
0
    if (((y + mOffset.y) & 1) == 0) {
289
0
      ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[0]));
290
0
      ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[4]));
291
0
    } else {
292
0
      ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[8]));
293
0
      ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[12]));
294
0
    }
295
296
0
    for (int x = 0; x < static_cast<int>(roundDown(uncropped_dim.x, 8));
297
0
         x += 8) {
298
0
      __m128i pix_high;
299
0
      __m128i temp;
300
0
      __m128i pix_low =
301
0
          _mm_load_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)));
302
      // Subtract black
303
0
      pix_low = _mm_subs_epu16(pix_low, ssesub);
304
      // Multiply the two unsigned shorts and combine it to 32 bit result
305
0
      pix_high = _mm_mulhi_epu16(pix_low, ssescale);
306
0
      temp = _mm_mullo_epi16(pix_low, ssescale);
307
0
      pix_low = _mm_unpacklo_epi16(temp, pix_high);
308
0
      pix_high = _mm_unpackhi_epi16(temp, pix_high);
309
      // Add rounder
310
0
      pix_low = _mm_add_epi32(pix_low, sseround);
311
0
      pix_high = _mm_add_epi32(pix_high, sseround);
312
313
0
      sserandom = _mm_xor_si128(_mm_mulhi_epi16(sserandom, rand_mul),
314
0
                                _mm_mullo_epi16(sserandom, rand_mul));
315
0
      __m128i rand_masked =
316
0
          _mm_and_si128(sserandom, rand_mask); // Get 8 random bits
317
0
      rand_masked = _mm_mullo_epi16(rand_masked, sse_full_scale_fp);
318
319
0
      __m128i zero = _mm_setzero_si128();
320
0
      __m128i rand_lo = _mm_sub_epi32(sse_half_scale_fp,
321
0
                                      _mm_unpacklo_epi16(rand_masked, zero));
322
0
      __m128i rand_hi = _mm_sub_epi32(sse_half_scale_fp,
323
0
                                      _mm_unpackhi_epi16(rand_masked, zero));
324
325
0
      pix_low = _mm_add_epi32(pix_low, rand_lo);
326
0
      pix_high = _mm_add_epi32(pix_high, rand_hi);
327
328
      // Shift down
329
0
      pix_low = _mm_srai_epi32(pix_low, 10);
330
0
      pix_high = _mm_srai_epi32(pix_high, 10);
331
      // Subtract to avoid clipping
332
0
      pix_low = _mm_sub_epi32(pix_low, ssesub2);
333
0
      pix_high = _mm_sub_epi32(pix_high, ssesub2);
334
      // Pack
335
0
      pix_low = _mm_packs_epi32(pix_low, pix_high);
336
      // Shift sign off
337
0
      pix_low = _mm_xor_si128(pix_low, ssesign);
338
0
      _mm_store_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)),
339
0
                      pix_low);
340
0
    }
341
0
  }
342
0
}
343
#endif
344
345
0
void RawImageDataU16::scaleValues_plain(int start_y, int end_y) {
346
0
  const CroppedArray2DRef<uint16_t> img(getU16DataAsCroppedArray2DRef());
347
348
0
  assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
349
0
  auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
350
351
0
  int depth_values = *whitePoint - blackLevelSeparate1D(0);
352
0
  float app_scale = 65535.0F / implicit_cast<float>(depth_values);
353
354
  // Scale in 30.2 fp
355
0
  auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
356
  // Half Scale in 18.14 fp
357
0
  auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);
358
359
  // Not SSE2
360
0
  int gw = dim.x * cpp;
361
0
  std::array<int, 4> mul;
362
0
  std::array<int, 4> sub;
363
0
  for (int i = 0; i < 4; i++) {
364
0
    int v = i;
365
0
    if ((mOffset.x & 1) != 0)
366
0
      v ^= 1;
367
0
    if ((mOffset.y & 1) != 0)
368
0
      v ^= 2;
369
0
    mul[i] = static_cast<int>(
370
0
        16384.0F * 65535.0F /
371
0
        static_cast<float>(*whitePoint - blackLevelSeparate1D(v)));
372
0
    sub[i] = blackLevelSeparate1D(v);
373
0
  }
374
0
  for (int y = start_y; y < end_y; y++) {
375
0
    int v = dim.x + y * 36969;
376
0
    for (int x = 0; x < gw; x++) {
377
0
      int rand;
378
0
      if (mDitherScale) {
379
0
        v = 18000 * (v & 65535) + (v >> 16);
380
0
        rand = half_scale_fp - (full_scale_fp * (v & 2047));
381
0
      } else {
382
0
        rand = 0;
383
0
      }
384
0
      uint16_t& pixel = img(y, x);
385
0
      pixel = clampBits(((pixel - sub[(2 * (y & 1)) + (x & 1)]) *
386
0
                             mul[(2 * (y & 1)) + (x & 1)] +
387
0
                         8192 + rand) >>
388
0
                            14,
389
0
                        16);
390
0
    }
391
0
  }
392
0
}
393
394
/* This performs a 4 way interpolated pixel */
395
/* The value is interpolated from the 4 closest valid pixels in */
396
/* the horizontal and vertical direction. Pixels found further away */
397
/* are weighed less */
398
399
0
void RawImageDataU16::fixBadPixel(uint32_t x, uint32_t y, int component) {
400
0
  const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();
401
402
0
  array<int, 4> values;
403
0
  array<int, 4> dist;
404
0
  array<int, 4> weight;
405
406
0
  values.fill(-1);
407
0
  dist.fill(0);
408
0
  weight.fill(0);
409
410
0
  const auto bad =
411
0
      Array2DRef(mBadPixelMap.data(), mBadPixelMapPitch, uncropped_dim.y);
412
0
  int step = isCFA ? 2 : 1;
413
414
  // Find pixel to the left
415
0
  int x_find = static_cast<int>(x) - step;
416
0
  int curr = 0;
417
0
  while (x_find >= 0 && values[curr] < 0) {
418
0
    if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
419
0
      values[curr] = img(y, x_find + component);
420
0
      dist[curr] = static_cast<int>(x) - x_find;
421
0
    }
422
0
    x_find -= step;
423
0
  }
424
  // Find pixel to the right
425
0
  x_find = static_cast<int>(x) + step;
426
0
  curr = 1;
427
0
  while (x_find < uncropped_dim.x && values[curr] < 0) {
428
0
    if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
429
0
      values[curr] = img(y, x_find + component);
430
0
      dist[curr] = x_find - static_cast<int>(x);
431
0
    }
432
0
    x_find += step;
433
0
  }
434
435
  // Find pixel upwards
436
0
  int y_find = static_cast<int>(y) - step;
437
0
  curr = 2;
438
0
  while (y_find >= 0 && values[curr] < 0) {
439
0
    if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
440
0
      values[curr] = img(y_find, x + component);
441
0
      dist[curr] = static_cast<int>(y) - y_find;
442
0
    }
443
0
    y_find -= step;
444
0
  }
445
  // Find pixel downwards
446
0
  y_find = static_cast<int>(y) + step;
447
0
  curr = 3;
448
0
  while (y_find < uncropped_dim.y && values[curr] < 0) {
449
0
    if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
450
0
      values[curr] = img(y_find, x + component);
451
0
      dist[curr] = y_find - static_cast<int>(y);
452
0
    }
453
0
    y_find += step;
454
0
  }
455
456
  // Find x weights
457
0
  int total_dist_x = dist[0] + dist[1];
458
459
0
  int total_shifts = 7;
460
0
  if (total_dist_x) {
461
0
    weight[0] = dist[0] ? (total_dist_x - dist[0]) * 256 / total_dist_x : 0;
462
0
    weight[1] = 256 - weight[0];
463
0
    total_shifts++;
464
0
  }
465
466
  // Find y weights
467
0
  if (int total_dist_y = dist[2] + dist[3]; total_dist_y) {
468
0
    weight[2] = dist[2] ? (total_dist_y - dist[2]) * 256 / total_dist_y : 0;
469
0
    weight[3] = 256 - weight[2];
470
0
    total_shifts++;
471
0
  }
472
473
0
  int total_pixel = 0;
474
0
  for (int i = 0; i < 4; i++)
475
0
    if (values[i] >= 0)
476
0
      total_pixel += values[i] * weight[i];
477
478
0
  total_pixel >>= total_shifts;
479
0
  img(y, x + component) = clampBits(total_pixel, 16);
480
481
  /* Process other pixels - could be done inline, since we have the weights */
482
0
  if (cpp > 1 && component == 0)
483
0
    for (int i = 1; i < cpp; i++)
484
0
      fixBadPixel(x, y, i);
485
0
}
486
487
// TODO: Could be done with SSE2
488
11.0k
void RawImageDataU16::doLookup(int start_y, int end_y) {
489
11.0k
  const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();
490
491
11.0k
  if (table->ntables == 1) {
492
9.76k
    if (table->dither) {
493
9.73k
      int gw = uncropped_dim.x * cpp;
494
9.73k
      const auto t = table->getTable(0);
495
78.3k
      for (int y = start_y; y < end_y; y++) {
496
68.6k
        uint32_t v = (uncropped_dim.x + y * 13) ^ 0x45694584;
497
1.00M
        for (int x = 0; x < gw; x++) {
498
935k
          uint16_t p = img(y, x);
499
935k
          uint32_t base = t(2 * p + 0);
500
935k
          uint32_t delta = t(2 * p + 1);
501
935k
          v = 15700 * (v & 65535) + (v >> 16);
502
935k
          uint32_t pix = base + ((delta * (v & 2047) + 1024) >> 12);
503
935k
          img(y, x) = clampBits(pix, 16);
504
935k
        }
505
68.6k
      }
506
9.73k
      return;
507
9.73k
    }
508
509
30
    int gw = uncropped_dim.x * cpp;
510
30
    const auto t = table->getTable(0);
511
30
    for (int y = start_y; y < end_y; y++) {
512
0
      for (int x = 0; x < gw; x++) {
513
0
        img(y, x) = t(img(y, x));
514
0
      }
515
0
    }
516
30
    return;
517
9.76k
  }
518
1.28k
  ThrowRDE("Table lookup with multiple components not implemented");
519
11.0k
}
520
521
} // namespace rawspeed