/src/librawspeed/src/librawspeed/common/RawImageDataU16.cpp

Source (jump to first uncovered line)
/*
    RawSpeed - RAW file decoder.

    Copyright (C) 2009-2014 Klaus Post

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "rawspeedconfig.h"
#include "common/RawImage.h"
#include "adt/Array1DRef.h"
#include "adt/Array2DRef.h"
#include "adt/Bit.h"
#include "adt/Casts.h"
#include "adt/CroppedArray2DRef.h"
#include "adt/Point.h"
#include "common/Common.h"
#include "common/TableLookUp.h"
#include "decoders/RawDecoderException.h"
#include "metadata/BlackArea.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdint>
#include <memory>
#include <vector>

#ifdef WITH_SSE2
#include "common/CpuFeatures.h"
#include <emmintrin.h>
#endif

using std::array;
using std::max;
using std::min;
using std::vector;

namespace rawspeed {

RawImageDataU16::RawImageDataU16() {
  dataType = RawImageType::UINT16;
  bpp = sizeof(uint16_t);
}

RawImageDataU16::RawImageDataU16(const iPoint2D& _dim, uint32_t _cpp)
    : RawImageData(RawImageType::UINT16, _dim, sizeof(uint16_t), _cpp) {}

void RawImageDataU16::calculateBlackAreas() {
  const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();

  std::vector<uint16_t> histogramStorage;
  auto histogram = Array2DRef<uint16_t>::create(histogramStorage, 65536, 4);

  for (int row = 0; row != histogram.height(); ++row) {
    for (int col = 0; col != histogram.width(); ++col) {
      histogram(row, col) = 0;
    }
  }

  int totalpixels = 0;

  for (auto area : blackAreas) {
    /* Make sure area sizes are multiple of two,
       so we have the same amount of pixels for each CFA group */
    area.size = area.size - (area.size & 1);

    /* Process horizontal area */
    if (!area.isVertical) {
      if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
          uncropped_dim.y)
        ThrowRDE("Offset + size is larger than height of image");
      for (uint32_t y = area.offset; y < area.offset + area.size; y++) {
        for (int x = mOffset.x; x < dim.x + mOffset.x; x++) {
          // FIXME: this only samples a single row, not an area.
          const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
          const auto hBin = img(y, mOffset.x);
          localhist(hBin)++;
        }
      }
      totalpixels += area.size * dim.x;
    }

    /* Process vertical area */
    if (area.isVertical) {
      if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
          uncropped_dim.x)
        ThrowRDE("Offset + size is larger than width of image");
      for (int y = mOffset.y; y < dim.y + mOffset.y; y++) {
        for (uint32_t x = area.offset; x < area.size + area.offset; x++) {
          // FIXME: this only samples a single row, not an area.
          const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
          const auto hBin = img(y, area.offset);
          localhist(hBin)++;
        }
      }
      totalpixels += area.size * dim.y;
    }
  }

  blackLevelSeparate = Array2DRef(blackLevelSeparateStorage.data(), 2, 2);
  auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();

  if (!totalpixels) {
    for (int& i : blackLevelSeparate1D)
      i = blackLevel;
    return;
  }

  /* Calculate median value of black areas for each component */
  /* Adjust the number of total pixels so it is the same as the median of each
   * histogram */
  totalpixels /= 4 * 2;

  for (int i = 0; i < 4; i++) {
    const auto localhist = histogram[i];
    int acc_pixels = localhist(0);
    int pixel_value = 0;
    while (acc_pixels <= totalpixels && pixel_value < 65535) {
      pixel_value++;
      acc_pixels += localhist(pixel_value);
    }
    blackLevelSeparate1D(i) = pixel_value;
  }

  /* If this is not a CFA image, we do not use separate blacklevels, use average
   */
  if (!isCFA) {
    int total = 0;
    for (int i : blackLevelSeparate1D)
      total += i;
    for (int& i : blackLevelSeparate1D)
      i = (total + 2) >> 2;
  }
}

void RawImageDataU16::scaleBlackWhite() {
  const int skipBorder = 250;
  int gw = (dim.x - skipBorder) * cpp;
  if ((blackAreas.empty() && !blackLevelSeparate && blackLevel < 0) ||
      !whitePoint) { // Estimate
    int b = 65536;
    int m = 0;
    auto img = getU16DataAsCroppedArray2DRef();
    for (int row = skipBorder; row < (dim.y - skipBorder); row++) {
      for (int col = skipBorder; col < gw; col++) {
        uint16_t pixel = img(row, skipBorder + col);
        b = min(static_cast<int>(pixel), b);
        m = max(static_cast<int>(pixel), m);
      }
    }
    if (blackLevel < 0)
      blackLevel = b;
    if (!whitePoint)
      whitePoint = m;
    writeLog(DEBUG_PRIO::INFO,
             "ISO:%d, Estimated black:%d, Estimated white: %d",
             metadata.isoSpeed, blackLevel, *whitePoint);
  }

  /* Skip, if not needed */
  if ((blackAreas.empty() && blackLevel == 0 && whitePoint == 65535 &&
       !blackLevelSeparate) ||
      dim.area() <= 0)
    return;

  /* If filter has not set separate blacklevel, compute or fetch it */
  if (!blackLevelSeparate)
    calculateBlackAreas();

  startWorker(RawImageWorker::RawImageWorkerTask::SCALE_VALUES, true);
}

void RawImageDataU16::scaleValues(int start_y, int end_y) {
#ifndef WITH_SSE2

  return scaleValues_plain(start_y, end_y);

#else

  int depth_values = *whitePoint - (*blackLevelSeparate)(0, 0);
  float app_scale = 65535.0F / implicit_cast<float>(depth_values);

  // Check SSE2
  if (Cpuid::SSE2() && app_scale < 63) {
    scaleValues_SSE2(start_y, end_y);
  } else {
    scaleValues_plain(start_y, end_y);
  }

#endif
}

#ifdef WITH_SSE2
void RawImageDataU16::scaleValues_SSE2(int start_y, int end_y) {
  assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
  auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();

  int depth_values = *whitePoint - blackLevelSeparate1D(0);
  float app_scale = 65535.0F / implicit_cast<float>(depth_values);

  // Scale in 30.2 fp
  auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
  // Half Scale in 18.14 fp
  auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);

  __m128i sseround;
  __m128i ssesub2;
  __m128i ssesign;
  __m128i rand_mul;
  __m128i rand_mask;
  __m128i sse_full_scale_fp;
  __m128i sse_half_scale_fp;

  std::array<uint32_t, 4 * 4> sub_mul;

  // 10 bit fraction
  uint32_t mul = static_cast<int>(
      1024.0F * 65535.0F /
      static_cast<float>(*whitePoint - blackLevelSeparate1D(mOffset.x & 1)));
  mul |= (static_cast<int>(
             1024.0F * 65535.0F /
             static_cast<float>(*whitePoint -
                                blackLevelSeparate1D((mOffset.x + 1) & 1))))
         << 16;
  uint32_t b = blackLevelSeparate1D(mOffset.x & 1) |
               (blackLevelSeparate1D((mOffset.x + 1) & 1) << 16);

  for (int i = 0; i < 4; i++) {
    sub_mul[i] = b;       // Subtract even lines
    sub_mul[4 + i] = mul; // Multiply even lines
  }

  mul = static_cast<int>(
      1024.0F * 65535.0F /
      static_cast<float>(*whitePoint -
                         blackLevelSeparate1D(2 + (mOffset.x & 1))));
  mul |= (static_cast<int>(
             1024.0F * 65535.0F /
             static_cast<float>(*whitePoint - blackLevelSeparate1D(
                                                  2 + ((mOffset.x + 1) & 1)))))
         << 16;
  b = blackLevelSeparate1D(2 + (mOffset.x & 1)) |
      (blackLevelSeparate1D(2 + ((mOffset.x + 1) & 1)) << 16);

  for (int i = 0; i < 4; i++) {
    sub_mul[8 + i] = b;    // Subtract odd lines
    sub_mul[12 + i] = mul; // Multiply odd lines
  }

  sseround = _mm_set_epi32(512, 512, 512, 512);
  ssesub2 = _mm_set_epi32(32768, 32768, 32768, 32768);
  ssesign = _mm_set_epi32(0x80008000, 0x80008000, 0x80008000, 0x80008000);
  sse_full_scale_fp = _mm_set1_epi32(full_scale_fp | (full_scale_fp << 16));
  sse_half_scale_fp = _mm_set1_epi32(half_scale_fp >> 4);

  if (mDitherScale) {
    rand_mul = _mm_set1_epi32(0x4d9f1d32);
  } else {
    rand_mul = _mm_set1_epi32(0);
  }
  rand_mask = _mm_set1_epi32(0x00ff00ff); // 8 random bits

  Array2DRef<uint16_t> out(getU16DataAsUncroppedArray2DRef());

  for (int y = start_y; y < end_y; y++) {
    __m128i sserandom;
    if (mDitherScale) {
      sserandom =
          _mm_set_epi32(dim.x * 1676 + y * 18000, dim.x * 2342 + y * 34311,
                        dim.x * 4272 + y * 12123, dim.x * 1234 + y * 23464);
    } else {
      sserandom = _mm_setzero_si128();
    }
    __m128i ssescale;
    __m128i ssesub;
    if (((y + mOffset.y) & 1) == 0) {
      ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[0]));
      ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[4]));
    } else {
      ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[8]));
      ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[12]));
    }

    for (int x = 0; x < static_cast<int>(roundDown(uncropped_dim.x, 8));
         x += 8) {
      __m128i pix_high;
      __m128i temp;
      __m128i pix_low =
          _mm_load_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)));
      // Subtract black
      pix_low = _mm_subs_epu16(pix_low, ssesub);
      // Multiply the two unsigned shorts and combine it to 32 bit result
      pix_high = _mm_mulhi_epu16(pix_low, ssescale);
      temp = _mm_mullo_epi16(pix_low, ssescale);
      pix_low = _mm_unpacklo_epi16(temp, pix_high);
      pix_high = _mm_unpackhi_epi16(temp, pix_high);
      // Add rounder
      pix_low = _mm_add_epi32(pix_low, sseround);
      pix_high = _mm_add_epi32(pix_high, sseround);

      sserandom = _mm_xor_si128(_mm_mulhi_epi16(sserandom, rand_mul),
                                _mm_mullo_epi16(sserandom, rand_mul));
      __m128i rand_masked =
          _mm_and_si128(sserandom, rand_mask); // Get 8 random bits
      rand_masked = _mm_mullo_epi16(rand_masked, sse_full_scale_fp);

      __m128i zero = _mm_setzero_si128();
      __m128i rand_lo = _mm_sub_epi32(sse_half_scale_fp,
                                      _mm_unpacklo_epi16(rand_masked, zero));
      __m128i rand_hi = _mm_sub_epi32(sse_half_scale_fp,
                                      _mm_unpackhi_epi16(rand_masked, zero));

      pix_low = _mm_add_epi32(pix_low, rand_lo);
      pix_high = _mm_add_epi32(pix_high, rand_hi);

      // Shift down
      pix_low = _mm_srai_epi32(pix_low, 10);
      pix_high = _mm_srai_epi32(pix_high, 10);
      // Subtract to avoid clipping
      pix_low = _mm_sub_epi32(pix_low, ssesub2);
      pix_high = _mm_sub_epi32(pix_high, ssesub2);
      // Pack
      pix_low = _mm_packs_epi32(pix_low, pix_high);
      // Shift sign off
      pix_low = _mm_xor_si128(pix_low, ssesign);
      _mm_store_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)),
                      pix_low);
    }
  }
}
#endif

void RawImageDataU16::scaleValues_plain(int start_y, int end_y) {
  const CroppedArray2DRef<uint16_t> img(getU16DataAsCroppedArray2DRef());

  assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
  auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();

  int depth_values = *whitePoint - blackLevelSeparate1D(0);
  float app_scale = 65535.0F / implicit_cast<float>(depth_values);

  // Scale in 30.2 fp
  auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
  // Half Scale in 18.14 fp
  auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);

  // Not SSE2
  int gw = dim.x * cpp;
  std::array<int, 4> mul;
  std::array<int, 4> sub;
  for (int i = 0; i < 4; i++) {
    int v = i;
    if ((mOffset.x & 1) != 0)
      v ^= 1;
    if ((mOffset.y & 1) != 0)
      v ^= 2;
    mul[i] = static_cast<int>(
        16384.0F * 65535.0F /
        static_cast<float>(*whitePoint - blackLevelSeparate1D(v)));
    sub[i] = blackLevelSeparate1D(v);
  }
  for (int y = start_y; y < end_y; y++) {
    int v = dim.x + y * 36969;
    for (int x = 0; x < gw; x++) {
      int rand;
      if (mDitherScale) {
        v = 18000 * (v & 65535) + (v >> 16);
        rand = half_scale_fp - (full_scale_fp * (v & 2047));
      } else {
        rand = 0;
      }
      uint16_t& pixel = img(y, x);
      pixel = clampBits(((pixel - sub[(2 * (y & 1)) + (x & 1)]) *
                             mul[(2 * (y & 1)) + (x & 1)] +
                         8192 + rand) >>
                            14,
                        16);
    }
  }
}

/* This performs a 4 way interpolated pixel */
/* The value is interpolated from the 4 closest valid pixels in */
/* the horizontal and vertical direction. Pixels found further away */
/* are weighed less */

void RawImageDataU16::fixBadPixel(uint32_t x, uint32_t y, int component) {
  const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();

  array<int, 4> values;
  array<int, 4> dist;
  array<int, 4> weight;

  values.fill(-1);
  dist.fill(0);
  weight.fill(0);

  const auto bad =
      Array2DRef(mBadPixelMap.data(), mBadPixelMapPitch, uncropped_dim.y);
  int step = isCFA ? 2 : 1;

  // Find pixel to the left
  int x_find = static_cast<int>(x) - step;
  int curr = 0;
  while (x_find >= 0 && values[curr] < 0) {
    if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
      values[curr] = img(y, x_find + component);
      dist[curr] = static_cast<int>(x) - x_find;
    }
    x_find -= step;
  }
  // Find pixel to the right
  x_find = static_cast<int>(x) + step;
  curr = 1;
  while (x_find < uncropped_dim.x && values[curr] < 0) {
    if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
      values[curr] = img(y, x_find + component);
      dist[curr] = x_find - static_cast<int>(x);
    }
    x_find += step;
  }

  // Find pixel upwards
  int y_find = static_cast<int>(y) - step;
  curr = 2;
  while (y_find >= 0 && values[curr] < 0) {
    if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
      values[curr] = img(y_find, x + component);
      dist[curr] = static_cast<int>(y) - y_find;
    }
    y_find -= step;
  }
  // Find pixel downwards
  y_find = static_cast<int>(y) + step;
  curr = 3;
  while (y_find < uncropped_dim.y && values[curr] < 0) {
    if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
      values[curr] = img(y_find, x + component);
      dist[curr] = y_find - static_cast<int>(y);
    }
    y_find += step;
  }

  // Find x weights
  int total_dist_x = dist[0] + dist[1];

  int total_shifts = 7;
  if (total_dist_x) {
    weight[0] = dist[0] ? (total_dist_x - dist[0]) * 256 / total_dist_x : 0;
    weight[1] = 256 - weight[0];
    total_shifts++;
  }

  // Find y weights
  if (int total_dist_y = dist[2] + dist[3]; total_dist_y) {
    weight[2] = dist[2] ? (total_dist_y - dist[2]) * 256 / total_dist_y : 0;
    weight[3] = 256 - weight[2];
    total_shifts++;
  }

  int total_pixel = 0;
  for (int i = 0; i < 4; i++)
    if (values[i] >= 0)
      total_pixel += values[i] * weight[i];

  total_pixel >>= total_shifts;
  img(y, x + component) = clampBits(total_pixel, 16);

  /* Process other pixels - could be done inline, since we have the weights */
  if (cpp > 1 && component == 0)
    for (int i = 1; i < cpp; i++)
      fixBadPixel(x, y, i);
}

// TODO: Could be done with SSE2
void RawImageDataU16::doLookup(int start_y, int end_y) {
  const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();

  if (table->ntables == 1) {
    if (table->dither) {
      int gw = uncropped_dim.x * cpp;
      const auto t = table->getTable(0);
      for (int y = start_y; y < end_y; y++) {
        uint32_t v = (uncropped_dim.x + y * 13) ^ 0x45694584;
        for (int x = 0; x < gw; x++) {
          uint16_t p = img(y, x);
          uint32_t base = t(2 * p + 0);
          uint32_t delta = t(2 * p + 1);
          v = 15700 * (v & 65535) + (v >> 16);
          uint32_t pix = base + ((delta * (v & 2047) + 1024) >> 12);
          img(y, x) = clampBits(pix, 16);
        }
      }
      return;
    }

    int gw = uncropped_dim.x * cpp;
    const auto t = table->getTable(0);
    for (int y = start_y; y < end_y; y++) {
      for (int x = 0; x < gw; x++) {
        img(y, x) = t(img(y, x));
      }
    }
    return;
  }
  ThrowRDE("Table lookup with multiple components not implemented");
}

} // namespace rawspeed

Coverage Report

Created: 2024-09-08 06:18

Line	Count	Source (jump to first uncovered line)
1		/*
2		RawSpeed - RAW file decoder.
3
4		Copyright (C) 2009-2014 Klaus Post
5
6		This library is free software; you can redistribute it and/or
7		modify it under the terms of the GNU Lesser General Public
8		License as published by the Free Software Foundation; either
9		version 2 of the License, or (at your option) any later version.
10
11		This library is distributed in the hope that it will be useful,
12		but WITHOUT ANY WARRANTY; without even the implied warranty of
13		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		Lesser General Public License for more details.
15
16		You should have received a copy of the GNU Lesser General Public
17		License along with this library; if not, write to the Free Software
18		Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "rawspeedconfig.h"
22		#include "common/RawImage.h"
23		#include "adt/Array1DRef.h"
24		#include "adt/Array2DRef.h"
25		#include "adt/Bit.h"
26		#include "adt/Casts.h"
27		#include "adt/CroppedArray2DRef.h"
28		#include "adt/Point.h"
29		#include "common/Common.h"
30		#include "common/TableLookUp.h"
31		#include "decoders/RawDecoderException.h"
32		#include "metadata/BlackArea.h"
33		#include <algorithm>
34		#include <array>
35		#include <cassert>
36		#include <cstdint>
37		#include <memory>
38		#include <vector>
39
40		#ifdef WITH_SSE2
41		#include "common/CpuFeatures.h"
42		#include <emmintrin.h>
43		#endif
44
45		using std::array;
46		using std::max;
47		using std::min;
48		using std::vector;
49
50		namespace rawspeed {
51
52	113k	RawImageDataU16::RawImageDataU16() {
53	113k	dataType = RawImageType::UINT16;
54	113k	bpp = sizeof(uint16_t);
55	113k	}
56
57		RawImageDataU16::RawImageDataU16(const iPoint2D& _dim, uint32_t _cpp)
58	0	: RawImageData(RawImageType::UINT16, _dim, sizeof(uint16_t), _cpp) {}
59
60	0	void RawImageDataU16::calculateBlackAreas() {
61	0	const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();
62
63	0	std::vector<uint16_t> histogramStorage;
64	0	auto histogram = Array2DRef<uint16_t>::create(histogramStorage, 65536, 4);
65
66	0	for (int row = 0; row != histogram.height(); ++row) {
67	0	for (int col = 0; col != histogram.width(); ++col) {
68	0	histogram(row, col) = 0;
69	0	}
70	0	}
71
72	0	int totalpixels = 0;
73
74	0	for (auto area : blackAreas) {
75		/* Make sure area sizes are multiple of two,
76		so we have the same amount of pixels for each CFA group */
77	0	area.size = area.size - (area.size & 1);
78
79		/* Process horizontal area */
80	0	if (!area.isVertical) {
81	0	if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
82	0	uncropped_dim.y)
83	0	ThrowRDE("Offset + size is larger than height of image");
84	0	for (uint32_t y = area.offset; y < area.offset + area.size; y++) {
85	0	for (int x = mOffset.x; x < dim.x + mOffset.x; x++) {
86		// FIXME: this only samples a single row, not an area.
87	0	const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
88	0	const auto hBin = img(y, mOffset.x);
89	0	localhist(hBin)++;
90	0	}
91	0	}
92	0	totalpixels += area.size * dim.x;
93	0	}
94
95		/* Process vertical area */
96	0	if (area.isVertical) {
97	0	if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
98	0	uncropped_dim.x)
99	0	ThrowRDE("Offset + size is larger than width of image");
100	0	for (int y = mOffset.y; y < dim.y + mOffset.y; y++) {
101	0	for (uint32_t x = area.offset; x < area.size + area.offset; x++) {
102		// FIXME: this only samples a single row, not an area.
103	0	const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
104	0	const auto hBin = img(y, area.offset);
105	0	localhist(hBin)++;
106	0	}
107	0	}
108	0	totalpixels += area.size * dim.y;
109	0	}
110	0	}
111
112	0	blackLevelSeparate = Array2DRef(blackLevelSeparateStorage.data(), 2, 2);
113	0	auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
114
115	0	if (!totalpixels) {
116	0	for (int& i : blackLevelSeparate1D)
117	0	i = blackLevel;
118	0	return;
119	0	}
120
121		/* Calculate median value of black areas for each component */
122		/* Adjust the number of total pixels so it is the same as the median of each
123		* histogram */
124	0	totalpixels /= 4 * 2;
125
126	0	for (int i = 0; i < 4; i++) {
127	0	const auto localhist = histogram[i];
128	0	int acc_pixels = localhist(0);
129	0	int pixel_value = 0;
130	0	while (acc_pixels <= totalpixels && pixel_value < 65535) {
131	0	pixel_value++;
132	0	acc_pixels += localhist(pixel_value);
133	0	}
134	0	blackLevelSeparate1D(i) = pixel_value;
135	0	}
136
137		/* If this is not a CFA image, we do not use separate blacklevels, use average
138		*/
139	0	if (!isCFA) {
140	0	int total = 0;
141	0	for (int i : blackLevelSeparate1D)
142	0	total += i;
143	0	for (int& i : blackLevelSeparate1D)
144	0	i = (total + 2) >> 2;
145	0	}
146	0	}
147
148	0	void RawImageDataU16::scaleBlackWhite() {
149	0	const int skipBorder = 250;
150	0	int gw = (dim.x - skipBorder) * cpp;
151	0	if ((blackAreas.empty() && !blackLevelSeparate && blackLevel < 0) \|\|
152	0	!whitePoint) { // Estimate
153	0	int b = 65536;
154	0	int m = 0;
155	0	auto img = getU16DataAsCroppedArray2DRef();
156	0	for (int row = skipBorder; row < (dim.y - skipBorder); row++) {
157	0	for (int col = skipBorder; col < gw; col++) {
158	0	uint16_t pixel = img(row, skipBorder + col);
159	0	b = min(static_cast<int>(pixel), b);
160	0	m = max(static_cast<int>(pixel), m);
161	0	}
162	0	}
163	0	if (blackLevel < 0)
164	0	blackLevel = b;
165	0	if (!whitePoint)
166	0	whitePoint = m;
167	0	writeLog(DEBUG_PRIO::INFO,
168	0	"ISO:%d, Estimated black:%d, Estimated white: %d",
169	0	metadata.isoSpeed, blackLevel, *whitePoint);
170	0	}
171
172		/* Skip, if not needed */
173	0	if ((blackAreas.empty() && blackLevel == 0 && whitePoint == 65535 &&
174	0	!blackLevelSeparate) \|\|
175	0	dim.area() <= 0)
176	0	return;
177
178		/* If filter has not set separate blacklevel, compute or fetch it */
179	0	if (!blackLevelSeparate)
180	0	calculateBlackAreas();
181
182	0	startWorker(RawImageWorker::RawImageWorkerTask::SCALE_VALUES, true);
183	0	}
184
185	0	void RawImageDataU16::scaleValues(int start_y, int end_y) {
186		#ifndef WITH_SSE2
187
188		return scaleValues_plain(start_y, end_y);
189
190		#else
191
192	0	int depth_values = whitePoint - (blackLevelSeparate)(0, 0);
193	0	float app_scale = 65535.0F / implicit_cast<float>(depth_values);
194
195		// Check SSE2
196	0	if (Cpuid::SSE2() && app_scale < 63) {
197	0	scaleValues_SSE2(start_y, end_y);
198	0	} else {
199	0	scaleValues_plain(start_y, end_y);
200	0	}
201
202	0	#endif
203	0	}
204
205		#ifdef WITH_SSE2
206	0	void RawImageDataU16::scaleValues_SSE2(int start_y, int end_y) {
207	0	assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
208	0	auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
209
210	0	int depth_values = *whitePoint - blackLevelSeparate1D(0);
211	0	float app_scale = 65535.0F / implicit_cast<float>(depth_values);
212
213		// Scale in 30.2 fp
214	0	auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
215		// Half Scale in 18.14 fp
216	0	auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);
217
218	0	__m128i sseround;
219	0	__m128i ssesub2;
220	0	__m128i ssesign;
221	0	__m128i rand_mul;
222	0	__m128i rand_mask;
223	0	__m128i sse_full_scale_fp;
224	0	__m128i sse_half_scale_fp;
225
226	0	std::array<uint32_t, 4 * 4> sub_mul;
227
228		// 10 bit fraction
229	0	uint32_t mul = static_cast<int>(
230	0	1024.0F * 65535.0F /
231	0	static_cast<float>(*whitePoint - blackLevelSeparate1D(mOffset.x & 1)));
232	0	mul \|= (static_cast<int>(
233	0	1024.0F * 65535.0F /
234	0	static_cast<float>(*whitePoint -
235	0	blackLevelSeparate1D((mOffset.x + 1) & 1))))
236	0	<< 16;
237	0	uint32_t b = blackLevelSeparate1D(mOffset.x & 1) \|
238	0	(blackLevelSeparate1D((mOffset.x + 1) & 1) << 16);
239
240	0	for (int i = 0; i < 4; i++) {
241	0	sub_mul[i] = b; // Subtract even lines
242	0	sub_mul[4 + i] = mul; // Multiply even lines
243	0	}
244
245	0	mul = static_cast<int>(
246	0	1024.0F * 65535.0F /
247	0	static_cast<float>(*whitePoint -
248	0	blackLevelSeparate1D(2 + (mOffset.x & 1))));
249	0	mul \|= (static_cast<int>(
250	0	1024.0F * 65535.0F /
251	0	static_cast<float>(*whitePoint - blackLevelSeparate1D(
252	0	2 + ((mOffset.x + 1) & 1)))))
253	0	<< 16;
254	0	b = blackLevelSeparate1D(2 + (mOffset.x & 1)) \|
255	0	(blackLevelSeparate1D(2 + ((mOffset.x + 1) & 1)) << 16);
256
257	0	for (int i = 0; i < 4; i++) {
258	0	sub_mul[8 + i] = b; // Subtract odd lines
259	0	sub_mul[12 + i] = mul; // Multiply odd lines
260	0	}
261
262	0	sseround = _mm_set_epi32(512, 512, 512, 512);
263	0	ssesub2 = _mm_set_epi32(32768, 32768, 32768, 32768);
264	0	ssesign = _mm_set_epi32(0x80008000, 0x80008000, 0x80008000, 0x80008000);
265	0	sse_full_scale_fp = _mm_set1_epi32(full_scale_fp \| (full_scale_fp << 16));
266	0	sse_half_scale_fp = _mm_set1_epi32(half_scale_fp >> 4);
267
268	0	if (mDitherScale) {
269	0	rand_mul = _mm_set1_epi32(0x4d9f1d32);
270	0	} else {
271	0	rand_mul = _mm_set1_epi32(0);
272	0	}
273	0	rand_mask = _mm_set1_epi32(0x00ff00ff); // 8 random bits
274
275	0	Array2DRef<uint16_t> out(getU16DataAsUncroppedArray2DRef());
276
277	0	for (int y = start_y; y < end_y; y++) {
278	0	__m128i sserandom;
279	0	if (mDitherScale) {
280	0	sserandom =
281	0	_mm_set_epi32(dim.x * 1676 + y * 18000, dim.x * 2342 + y * 34311,
282	0	dim.x * 4272 + y * 12123, dim.x * 1234 + y * 23464);
283	0	} else {
284	0	sserandom = _mm_setzero_si128();
285	0	}
286	0	__m128i ssescale;
287	0	__m128i ssesub;
288	0	if (((y + mOffset.y) & 1) == 0) {
289	0	ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[0]));
290	0	ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[4]));
291	0	} else {
292	0	ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[8]));
293	0	ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[12]));
294	0	}
295
296	0	for (int x = 0; x < static_cast<int>(roundDown(uncropped_dim.x, 8));
297	0	x += 8) {
298	0	__m128i pix_high;
299	0	__m128i temp;
300	0	__m128i pix_low =
301	0	_mm_load_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)));
302		// Subtract black
303	0	pix_low = _mm_subs_epu16(pix_low, ssesub);
304		// Multiply the two unsigned shorts and combine it to 32 bit result
305	0	pix_high = _mm_mulhi_epu16(pix_low, ssescale);
306	0	temp = _mm_mullo_epi16(pix_low, ssescale);
307	0	pix_low = _mm_unpacklo_epi16(temp, pix_high);
308	0	pix_high = _mm_unpackhi_epi16(temp, pix_high);
309		// Add rounder
310	0	pix_low = _mm_add_epi32(pix_low, sseround);
311	0	pix_high = _mm_add_epi32(pix_high, sseround);
312
313	0	sserandom = _mm_xor_si128(_mm_mulhi_epi16(sserandom, rand_mul),
314	0	_mm_mullo_epi16(sserandom, rand_mul));
315	0	__m128i rand_masked =
316	0	_mm_and_si128(sserandom, rand_mask); // Get 8 random bits
317	0	rand_masked = _mm_mullo_epi16(rand_masked, sse_full_scale_fp);
318
319	0	__m128i zero = _mm_setzero_si128();
320	0	__m128i rand_lo = _mm_sub_epi32(sse_half_scale_fp,
321	0	_mm_unpacklo_epi16(rand_masked, zero));
322	0	__m128i rand_hi = _mm_sub_epi32(sse_half_scale_fp,
323	0	_mm_unpackhi_epi16(rand_masked, zero));
324
325	0	pix_low = _mm_add_epi32(pix_low, rand_lo);
326	0	pix_high = _mm_add_epi32(pix_high, rand_hi);
327
328		// Shift down
329	0	pix_low = _mm_srai_epi32(pix_low, 10);
330	0	pix_high = _mm_srai_epi32(pix_high, 10);
331		// Subtract to avoid clipping
332	0	pix_low = _mm_sub_epi32(pix_low, ssesub2);
333	0	pix_high = _mm_sub_epi32(pix_high, ssesub2);
334		// Pack
335	0	pix_low = _mm_packs_epi32(pix_low, pix_high);
336		// Shift sign off
337	0	pix_low = _mm_xor_si128(pix_low, ssesign);
338	0	_mm_store_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)),
339	0	pix_low);
340	0	}
341	0	}
342	0	}
343		#endif
344
345	0	void RawImageDataU16::scaleValues_plain(int start_y, int end_y) {
346	0	const CroppedArray2DRef<uint16_t> img(getU16DataAsCroppedArray2DRef());
347
348	0	assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
349	0	auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
350
351	0	int depth_values = *whitePoint - blackLevelSeparate1D(0);
352	0	float app_scale = 65535.0F / implicit_cast<float>(depth_values);
353
354		// Scale in 30.2 fp
355	0	auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
356		// Half Scale in 18.14 fp
357	0	auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);
358
359		// Not SSE2
360	0	int gw = dim.x * cpp;
361	0	std::array<int, 4> mul;
362	0	std::array<int, 4> sub;
363	0	for (int i = 0; i < 4; i++) {
364	0	int v = i;
365	0	if ((mOffset.x & 1) != 0)
366	0	v ^= 1;
367	0	if ((mOffset.y & 1) != 0)
368	0	v ^= 2;
369	0	mul[i] = static_cast<int>(
370	0	16384.0F * 65535.0F /
371	0	static_cast<float>(*whitePoint - blackLevelSeparate1D(v)));
372	0	sub[i] = blackLevelSeparate1D(v);
373	0	}
374	0	for (int y = start_y; y < end_y; y++) {
375	0	int v = dim.x + y * 36969;
376	0	for (int x = 0; x < gw; x++) {
377	0	int rand;
378	0	if (mDitherScale) {
379	0	v = 18000 * (v & 65535) + (v >> 16);
380	0	rand = half_scale_fp - (full_scale_fp * (v & 2047));
381	0	} else {
382	0	rand = 0;
383	0	}
384	0	uint16_t& pixel = img(y, x);
385	0	pixel = clampBits(((pixel - sub[(2 * (y & 1)) + (x & 1)]) *
386	0	mul[(2 * (y & 1)) + (x & 1)] +
387	0	8192 + rand) >>
388	0	14,
389	0	16);
390	0	}
391	0	}
392	0	}
393
394		/* This performs a 4 way interpolated pixel */
395		/* The value is interpolated from the 4 closest valid pixels in */
396		/* the horizontal and vertical direction. Pixels found further away */
397		/* are weighed less */
398
399	0	void RawImageDataU16::fixBadPixel(uint32_t x, uint32_t y, int component) {
400	0	const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();
401
402	0	array<int, 4> values;
403	0	array<int, 4> dist;
404	0	array<int, 4> weight;
405
406	0	values.fill(-1);
407	0	dist.fill(0);
408	0	weight.fill(0);
409
410	0	const auto bad =
411	0	Array2DRef(mBadPixelMap.data(), mBadPixelMapPitch, uncropped_dim.y);
412	0	int step = isCFA ? 2 : 1;
413
414		// Find pixel to the left
415	0	int x_find = static_cast<int>(x) - step;
416	0	int curr = 0;
417	0	while (x_find >= 0 && values[curr] < 0) {
418	0	if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
419	0	values[curr] = img(y, x_find + component);
420	0	dist[curr] = static_cast<int>(x) - x_find;
421	0	}
422	0	x_find -= step;
423	0	}
424		// Find pixel to the right
425	0	x_find = static_cast<int>(x) + step;
426	0	curr = 1;
427	0	while (x_find < uncropped_dim.x && values[curr] < 0) {
428	0	if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
429	0	values[curr] = img(y, x_find + component);
430	0	dist[curr] = x_find - static_cast<int>(x);
431	0	}
432	0	x_find += step;
433	0	}
434
435		// Find pixel upwards
436	0	int y_find = static_cast<int>(y) - step;
437	0	curr = 2;
438	0	while (y_find >= 0 && values[curr] < 0) {
439	0	if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
440	0	values[curr] = img(y_find, x + component);
441	0	dist[curr] = static_cast<int>(y) - y_find;
442	0	}
443	0	y_find -= step;
444	0	}
445		// Find pixel downwards
446	0	y_find = static_cast<int>(y) + step;
447	0	curr = 3;
448	0	while (y_find < uncropped_dim.y && values[curr] < 0) {
449	0	if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
450	0	values[curr] = img(y_find, x + component);
451	0	dist[curr] = y_find - static_cast<int>(y);
452	0	}
453	0	y_find += step;
454	0	}
455
456		// Find x weights
457	0	int total_dist_x = dist[0] + dist[1];
458
459	0	int total_shifts = 7;
460	0	if (total_dist_x) {
461	0	weight[0] = dist[0] ? (total_dist_x - dist[0]) * 256 / total_dist_x : 0;
462	0	weight[1] = 256 - weight[0];
463	0	total_shifts++;
464	0	}
465
466		// Find y weights
467	0	if (int total_dist_y = dist[2] + dist[3]; total_dist_y) {
468	0	weight[2] = dist[2] ? (total_dist_y - dist[2]) * 256 / total_dist_y : 0;
469	0	weight[3] = 256 - weight[2];
470	0	total_shifts++;
471	0	}
472
473	0	int total_pixel = 0;
474	0	for (int i = 0; i < 4; i++)
475	0	if (values[i] >= 0)
476	0	total_pixel += values[i] * weight[i];
477
478	0	total_pixel >>= total_shifts;
479	0	img(y, x + component) = clampBits(total_pixel, 16);
480
481		/* Process other pixels - could be done inline, since we have the weights */
482	0	if (cpp > 1 && component == 0)
483	0	for (int i = 1; i < cpp; i++)
484	0	fixBadPixel(x, y, i);
485	0	}
486
487		// TODO: Could be done with SSE2
488	11.0k	void RawImageDataU16::doLookup(int start_y, int end_y) {
489	11.0k	const Array2DRef<uint16_t> img = getU16DataAsUncroppedArray2DRef();
490
491	11.0k	if (table->ntables == 1) {
492	9.76k	if (table->dither) {
493	9.73k	int gw = uncropped_dim.x * cpp;
494	9.73k	const auto t = table->getTable(0);
495	78.3k	for (int y = start_y; y < end_y; y++) {
496	68.6k	uint32_t v = (uncropped_dim.x + y * 13) ^ 0x45694584;
497	1.00M	for (int x = 0; x < gw; x++) {
498	935k	uint16_t p = img(y, x);
499	935k	uint32_t base = t(2 * p + 0);
500	935k	uint32_t delta = t(2 * p + 1);
501	935k	v = 15700 * (v & 65535) + (v >> 16);
502	935k	uint32_t pix = base + ((delta * (v & 2047) + 1024) >> 12);
503	935k	img(y, x) = clampBits(pix, 16);
504	935k	}
505	68.6k	}
506	9.73k	return;
507	9.73k	}
508
509	30	int gw = uncropped_dim.x * cpp;
510	30	const auto t = table->getTable(0);
511	30	for (int y = start_y; y < end_y; y++) {
512	0	for (int x = 0; x < gw; x++) {
513	0	img(y, x) = t(img(y, x));
514	0	}
515	0	}
516	30	return;
517	9.76k	}
518	1.28k	ThrowRDE("Table lookup with multiple components not implemented");
519	11.0k	}
520
521		} // namespace rawspeed