Coverage Report

Created: 2026-02-14 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwebp/src/enc/predictor_enc.c
Line
Count
Source
1
// Copyright 2016 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Image transform methods for lossless encoder.
11
//
12
// Authors: Vikas Arora (vikaas.arora@gmail.com)
13
//          Jyrki Alakuijala (jyrki@google.com)
14
//          Urvang Joshi (urvang@google.com)
15
//          Vincent Rabaud (vrabaud@google.com)
16
17
#include <assert.h>
18
#include <stdlib.h>
19
#include <string.h>
20
21
#include "src/dsp/lossless.h"
22
#include "src/dsp/lossless_common.h"
23
#include "src/enc/vp8i_enc.h"
24
#include "src/enc/vp8li_enc.h"
25
#include "src/utils/utils.h"
26
#include "src/webp/encode.h"
27
#include "src/webp/format_constants.h"
28
#include "src/webp/types.h"
29
30
23.7M
#define HISTO_SIZE (4 * 256)
31
static const int64_t kSpatialPredictorBias = 15ll << LOG_2_PRECISION_BITS;
32
static const int kPredLowEffort = 11;
33
static const uint32_t kMaskAlpha = 0xff000000;
34
static const int kNumPredModes = 14;
35
36
// Mostly used to reduce code size + readability
37
2.97M
static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
38
971
static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
39
40
//------------------------------------------------------------------------------
41
// Methods to calculate Entropy (Shannon).
42
43
// Compute a bias for prediction entropy using a global heuristic to favor
44
// values closer to 0. Hence the final negative sign.
45
// 'exp_val' has a scaling factor of 1/100.
46
static int64_t PredictionCostBias(const uint32_t counts[256], uint64_t weight_0,
47
43.5M
                                  uint64_t exp_val) {
48
43.5M
  const int significant_symbols = 256 >> 4;
49
43.5M
  const uint64_t exp_decay_factor = 6;  // has a scaling factor of 1/10
50
43.5M
  uint64_t bits = (weight_0 * counts[0]) << LOG_2_PRECISION_BITS;
51
43.5M
  int i;
52
43.5M
  exp_val <<= LOG_2_PRECISION_BITS;
53
696M
  for (i = 1; i < significant_symbols; ++i) {
54
652M
    bits += DivRound(exp_val * (counts[i] + counts[256 - i]), 100);
55
652M
    exp_val = DivRound(exp_decay_factor * exp_val, 10);
56
652M
  }
57
43.5M
  return -DivRound((int64_t)bits, 10);
58
43.5M
}
59
60
static int64_t PredictionCostSpatialHistogram(
61
    const uint32_t accumulated[HISTO_SIZE], const uint32_t tile[HISTO_SIZE],
62
7.06M
    int mode, int left_mode, int above_mode) {
63
7.06M
  int i;
64
7.06M
  int64_t retval = 0;
65
35.3M
  for (i = 0; i < 4; ++i) {
66
28.2M
    const uint64_t kExpValue = 94;
67
28.2M
    retval += PredictionCostBias(&tile[i * 256], 1, kExpValue);
68
    // Compute the new cost if 'tile' is added to 'accumulate' but also add the
69
    // cost of the current histogram to guide the spatial predictor selection.
70
    // Basically, favor low entropy, locally and globally.
71
28.2M
    retval += (int64_t)VP8LCombinedShannonEntropy(&tile[i * 256],
72
28.2M
                                                  &accumulated[i * 256]);
73
28.2M
  }
74
  // Favor keeping the areas locally similar.
75
7.06M
  if (mode == left_mode) retval -= kSpatialPredictorBias;
76
7.06M
  if (mode == above_mode) retval -= kSpatialPredictorBias;
77
7.06M
  return retval;
78
7.06M
}
79
80
static WEBP_INLINE void UpdateHisto(uint32_t histo_argb[HISTO_SIZE],
81
4.80G
                                    uint32_t argb) {
82
4.80G
  ++histo_argb[0 * 256 + (argb >> 24)];
83
4.80G
  ++histo_argb[1 * 256 + ((argb >> 16) & 0xff)];
84
4.80G
  ++histo_argb[2 * 256 + ((argb >> 8) & 0xff)];
85
4.80G
  ++histo_argb[3 * 256 + (argb & 0xff)];
86
4.80G
}
87
88
//------------------------------------------------------------------------------
89
// Spatial transform functions.
90
91
static WEBP_INLINE void PredictBatch(int mode, int x_start, int y,
92
                                     int num_pixels, const uint32_t* current,
93
98.9M
                                     const uint32_t* upper, uint32_t* out) {
94
98.9M
  if (x_start == 0) {
95
5.67M
    if (y == 0) {
96
      // ARGB_BLACK.
97
8.91k
      VP8LPredictorsSub[0](current, NULL, 1, out);
98
5.66M
    } else {
99
      // Top one.
100
5.66M
      VP8LPredictorsSub[2](current, upper, 1, out);
101
5.66M
    }
102
5.67M
    ++x_start;
103
5.67M
    ++out;
104
5.67M
    --num_pixels;
105
5.67M
  }
106
98.9M
  if (y == 0) {
107
    // Left one.
108
153k
    VP8LPredictorsSub[1](current + x_start, NULL, num_pixels, out);
109
98.7M
  } else {
110
98.7M
    VP8LPredictorsSub[mode](current + x_start, upper + x_start, num_pixels,
111
98.7M
                            out);
112
98.7M
  }
113
98.9M
}
114
115
#if (WEBP_NEAR_LOSSLESS == 1)
116
0
static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) {
117
0
  const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24));
118
0
  const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff));
119
0
  const int diff_g = abs((int)((p1 >> 8) & 0xff) - (int)((p2 >> 8) & 0xff));
120
0
  const int diff_b = abs((int)(p1 & 0xff) - (int)(p2 & 0xff));
121
0
  return GetMax(GetMax(diff_a, diff_r), GetMax(diff_g, diff_b));
122
0
}
123
124
static int MaxDiffAroundPixel(uint32_t current, uint32_t up, uint32_t down,
125
0
                              uint32_t left, uint32_t right) {
126
0
  const int diff_up = MaxDiffBetweenPixels(current, up);
127
0
  const int diff_down = MaxDiffBetweenPixels(current, down);
128
0
  const int diff_left = MaxDiffBetweenPixels(current, left);
129
0
  const int diff_right = MaxDiffBetweenPixels(current, right);
130
0
  return GetMax(GetMax(diff_up, diff_down), GetMax(diff_left, diff_right));
131
0
}
132
133
0
static uint32_t AddGreenToBlueAndRed(uint32_t argb) {
134
0
  const uint32_t green = (argb >> 8) & 0xff;
135
0
  uint32_t red_blue = argb & 0x00ff00ffu;
136
0
  red_blue += (green << 16) | green;
137
0
  red_blue &= 0x00ff00ffu;
138
0
  return (argb & 0xff00ff00u) | red_blue;
139
0
}
140
141
static void MaxDiffsForRow(int width, int stride, const uint32_t* const argb,
142
0
                           uint8_t* const max_diffs, int used_subtract_green) {
143
0
  uint32_t current, up, down, left, right;
144
0
  int x;
145
0
  if (width <= 2) return;
146
0
  current = argb[0];
147
0
  right = argb[1];
148
0
  if (used_subtract_green) {
149
0
    current = AddGreenToBlueAndRed(current);
150
0
    right = AddGreenToBlueAndRed(right);
151
0
  }
152
  // max_diffs[0] and max_diffs[width - 1] are never used.
153
0
  for (x = 1; x < width - 1; ++x) {
154
0
    up = argb[-stride + x];
155
0
    down = argb[stride + x];
156
0
    left = current;
157
0
    current = right;
158
0
    right = argb[x + 1];
159
0
    if (used_subtract_green) {
160
0
      up = AddGreenToBlueAndRed(up);
161
0
      down = AddGreenToBlueAndRed(down);
162
0
      right = AddGreenToBlueAndRed(right);
163
0
    }
164
0
    max_diffs[x] = MaxDiffAroundPixel(current, up, down, left, right);
165
0
  }
166
0
}
167
168
// Quantize the difference between the actual component value and its prediction
169
// to a multiple of quantization, working modulo 256, taking care not to cross
170
// a boundary (inclusive upper limit).
171
static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict,
172
0
                                     uint8_t boundary, int quantization) {
173
0
  const int residual = (value - predict) & 0xff;
174
0
  const int boundary_residual = (boundary - predict) & 0xff;
175
0
  const int lower = residual & ~(quantization - 1);
176
0
  const int upper = lower + quantization;
177
  // Resolve ties towards a value closer to the prediction (i.e. towards lower
178
  // if value comes after prediction and towards upper otherwise).
179
0
  const int bias = ((boundary - value) & 0xff) < boundary_residual;
180
0
  if (residual - lower < upper - residual + bias) {
181
    // lower is closer to residual than upper.
182
0
    if (residual > boundary_residual && lower <= boundary_residual) {
183
      // Halve quantization step to avoid crossing boundary. This midpoint is
184
      // on the same side of boundary as residual because midpoint >= residual
185
      // (since lower is closer than upper) and residual is above the boundary.
186
0
      return lower + (quantization >> 1);
187
0
    }
188
0
    return lower;
189
0
  } else {
190
    // upper is closer to residual than lower.
191
0
    if (residual <= boundary_residual && upper > boundary_residual) {
192
      // Halve quantization step to avoid crossing boundary. This midpoint is
193
      // on the same side of boundary as residual because midpoint <= residual
194
      // (since upper is closer than lower) and residual is below the boundary.
195
0
      return lower + (quantization >> 1);
196
0
    }
197
0
    return upper & 0xff;
198
0
  }
199
0
}
200
201
0
static WEBP_INLINE uint8_t NearLosslessDiff(uint8_t a, uint8_t b) {
202
0
  return (uint8_t)((((int)(a) - (int)(b))) & 0xff);
203
0
}
204
205
// Quantize every component of the difference between the actual pixel value and
206
// its prediction to a multiple of a quantization (a power of 2, not larger than
207
// max_quantization which is a power of 2, smaller than max_diff). Take care if
208
// value and predict have undergone subtract green, which means that red and
209
// blue are represented as offsets from green.
210
static uint32_t NearLossless(uint32_t value, uint32_t predict,
211
                             int max_quantization, int max_diff,
212
0
                             int used_subtract_green) {
213
0
  int quantization;
214
0
  uint8_t new_green = 0;
215
0
  uint8_t green_diff = 0;
216
0
  uint8_t a, r, g, b;
217
0
  if (max_diff <= 2) {
218
0
    return VP8LSubPixels(value, predict);
219
0
  }
220
0
  quantization = max_quantization;
221
0
  while (quantization >= max_diff) {
222
0
    quantization >>= 1;
223
0
  }
224
0
  if ((value >> 24) == 0 || (value >> 24) == 0xff) {
225
    // Preserve transparency of fully transparent or fully opaque pixels.
226
0
    a = NearLosslessDiff((value >> 24) & 0xff, (predict >> 24) & 0xff);
227
0
  } else {
228
0
    a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization);
229
0
  }
230
0
  g = NearLosslessComponent((value >> 8) & 0xff, (predict >> 8) & 0xff, 0xff,
231
0
                            quantization);
232
0
  if (used_subtract_green) {
233
    // The green offset will be added to red and blue components during decoding
234
    // to obtain the actual red and blue values.
235
0
    new_green = ((predict >> 8) + g) & 0xff;
236
    // The amount by which green has been adjusted during quantization. It is
237
    // subtracted from red and blue for compensation, to avoid accumulating two
238
    // quantization errors in them.
239
0
    green_diff = NearLosslessDiff(new_green, (value >> 8) & 0xff);
240
0
  }
241
0
  r = NearLosslessComponent(NearLosslessDiff((value >> 16) & 0xff, green_diff),
242
0
                            (predict >> 16) & 0xff, 0xff - new_green,
243
0
                            quantization);
244
0
  b = NearLosslessComponent(NearLosslessDiff(value & 0xff, green_diff),
245
0
                            predict & 0xff, 0xff - new_green, quantization);
246
0
  return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
247
0
}
248
#endif  // (WEBP_NEAR_LOSSLESS == 1)
249
250
// Stores the difference between the pixel and its prediction in "out".
251
// In case of a lossy encoding, updates the source image to avoid propagating
252
// the deviation further to pixels which depend on the current pixel for their
253
// predictions.
254
static WEBP_INLINE void GetResidual(
255
    int width, int height, uint32_t* const upper_row,
256
    uint32_t* const current_row, const uint8_t* const max_diffs, int mode,
257
    int x_start, int x_end, int y, int max_quantization, int exact,
258
174M
    int used_subtract_green, uint32_t* const out) {
259
174M
  if (exact) {
260
98.9M
    PredictBatch(mode, x_start, y, x_end - x_start, current_row, upper_row,
261
98.9M
                 out);
262
98.9M
  } else {
263
75.6M
    const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
264
75.6M
    int x;
265
2.00G
    for (x = x_start; x < x_end; ++x) {
266
1.93G
      uint32_t predict;
267
1.93G
      uint32_t residual;
268
1.93G
      if (y == 0) {
269
2.94M
        predict = (x == 0) ? ARGB_BLACK : current_row[x - 1];  // Left.
270
1.92G
      } else if (x == 0) {
271
2.54M
        predict = upper_row[x];  // Top.
272
1.92G
      } else {
273
1.92G
        predict = pred_func(&current_row[x - 1], upper_row + x);
274
1.92G
      }
275
1.93G
#if (WEBP_NEAR_LOSSLESS == 1)
276
1.93G
      if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
277
1.93G
          x == 0 || x == width - 1) {
278
1.93G
        residual = VP8LSubPixels(current_row[x], predict);
279
1.93G
      } else {
280
0
        residual = NearLossless(current_row[x], predict, max_quantization,
281
0
                                max_diffs[x], used_subtract_green);
282
        // Update the source image.
283
0
        current_row[x] = VP8LAddPixels(predict, residual);
284
        // x is never 0 here so we do not need to update upper_row like below.
285
0
      }
286
#else
287
      (void)max_diffs;
288
      (void)height;
289
      (void)max_quantization;
290
      (void)used_subtract_green;
291
      residual = VP8LSubPixels(current_row[x], predict);
292
#endif
293
1.93G
      if ((current_row[x] & kMaskAlpha) == 0) {
294
        // If alpha is 0, cleanup RGB. We can choose the RGB values of the
295
        // residual for best compression. The prediction of alpha itself can be
296
        // non-zero and must be kept though. We choose RGB of the residual to be
297
        // 0.
298
58.0M
        residual &= kMaskAlpha;
299
        // Update the source image.
300
58.0M
        current_row[x] = predict & ~kMaskAlpha;
301
        // The prediction for the rightmost pixel in a row uses the leftmost
302
        // pixel
303
        // in that row as its top-right context pixel. Hence if we change the
304
        // leftmost pixel of current_row, the corresponding change must be
305
        // applied
306
        // to upper_row as well where top-right context is being read from.
307
58.0M
        if (x == 0 && y != 0) upper_row[width] = current_row[0];
308
58.0M
      }
309
1.93G
      out[x - x_start] = residual;
310
1.93G
    }
311
75.6M
  }
312
174M
}
313
314
// Accessors to residual histograms.
315
static WEBP_INLINE uint32_t* GetHistoArgb(uint32_t* const all_histos,
316
7.06M
                                          int subsampling_index, int mode) {
317
7.06M
  return &all_histos[(subsampling_index * kNumPredModes + mode) * HISTO_SIZE];
318
7.06M
}
319
320
static WEBP_INLINE const uint32_t* GetHistoArgbConst(
321
7.57M
    const uint32_t* const all_histos, int subsampling_index, int mode) {
322
7.57M
  return &all_histos[subsampling_index * kNumPredModes * HISTO_SIZE +
323
7.57M
                     mode * HISTO_SIZE];
324
7.57M
}
325
326
// Accessors to accumulated residual histogram.
327
static WEBP_INLINE uint32_t* GetAccumulatedHisto(uint32_t* all_accumulated,
328
505k
                                                 int subsampling_index) {
329
505k
  return &all_accumulated[subsampling_index * HISTO_SIZE];
330
505k
}
331
332
// Find and store the best predictor for a tile at subsampling
333
// 'subsampling_index'.
334
static void GetBestPredictorForTile(const uint32_t* const all_argb,
335
                                    int subsampling_index, int tile_x,
336
                                    int tile_y, int tiles_per_row,
337
                                    uint32_t* all_accumulated_argb,
338
                                    uint32_t** const all_modes,
339
504k
                                    uint32_t* const all_pred_histos) {
340
504k
  uint32_t* const accumulated_argb =
341
504k
      GetAccumulatedHisto(all_accumulated_argb, subsampling_index);
342
504k
  uint32_t* const modes = all_modes[subsampling_index];
343
504k
  uint32_t* const pred_histos =
344
504k
      &all_pred_histos[subsampling_index * kNumPredModes];
345
  // Prediction modes of the left and above neighbor tiles.
346
504k
  const int left_mode =
347
504k
      (tile_x > 0) ? (modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff
348
504k
                   : 0xff;
349
504k
  const int above_mode =
350
504k
      (tile_y > 0) ? (modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff
351
504k
                   : 0xff;
352
504k
  int mode;
353
504k
  int64_t best_diff = WEBP_INT64_MAX;
354
504k
  uint32_t best_mode = 0;
355
504k
  const uint32_t* best_histo =
356
504k
      GetHistoArgbConst(all_argb, /*subsampling_index=*/0, best_mode);
357
7.57M
  for (mode = 0; mode < kNumPredModes; ++mode) {
358
7.06M
    const uint32_t* const histo_argb =
359
7.06M
        GetHistoArgbConst(all_argb, subsampling_index, mode);
360
7.06M
    const int64_t cur_diff = PredictionCostSpatialHistogram(
361
7.06M
        accumulated_argb, histo_argb, mode, left_mode, above_mode);
362
363
7.06M
    if (cur_diff < best_diff) {
364
1.56M
      best_histo = histo_argb;
365
1.56M
      best_diff = cur_diff;
366
1.56M
      best_mode = mode;
367
1.56M
    }
368
7.06M
  }
369
  // Update the accumulated histogram.
370
504k
  VP8LAddVectorEq(best_histo, accumulated_argb, HISTO_SIZE);
371
504k
  modes[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (best_mode << 8);
372
504k
  ++pred_histos[best_mode];
373
504k
}
374
375
// Computes the residuals for the different predictors.
376
// If max_quantization > 1, assumes that near lossless processing will be
377
// applied, quantizing residuals to multiples of quantization levels up to
378
// max_quantization (the actual quantization level depends on smoothness near
379
// the given pixel).
380
static void ComputeResidualsForTile(
381
    int width, int height, int tile_x, int tile_y, int min_bits,
382
    uint32_t update_up_to_index, uint32_t* const all_argb,
383
    uint32_t* const argb_scratch, const uint32_t* const argb,
384
504k
    int max_quantization, int exact, int used_subtract_green) {
385
504k
  const int start_x = tile_x << min_bits;
386
504k
  const int start_y = tile_y << min_bits;
387
504k
  const int tile_size = 1 << min_bits;
388
504k
  const int max_y = GetMin(tile_size, height - start_y);
389
504k
  const int max_x = GetMin(tile_size, width - start_x);
390
  // Whether there exist columns just outside the tile.
391
504k
  const int have_left = (start_x > 0);
392
  // Position and size of the strip covering the tile and adjacent columns if
393
  // they exist.
394
504k
  const int context_start_x = start_x - have_left;
395
504k
#if (WEBP_NEAR_LOSSLESS == 1)
396
504k
  const int context_width = max_x + have_left + (max_x < width - start_x);
397
504k
#endif
398
  // The width of upper_row and current_row is one pixel larger than image width
399
  // to allow the top right pixel to point to the leftmost pixel of the next row
400
  // when at the right edge.
401
504k
  uint32_t* upper_row = argb_scratch;
402
504k
  uint32_t* current_row = upper_row + width + 1;
403
504k
  uint8_t* const max_diffs = (uint8_t*)(current_row + width + 1);
404
504k
  int mode;
405
  // Need pointers to be able to swap arrays.
406
504k
  uint32_t residuals[1 << MAX_TRANSFORM_BITS];
407
504k
  assert(max_x <= (1 << MAX_TRANSFORM_BITS));
408
7.57M
  for (mode = 0; mode < kNumPredModes; ++mode) {
409
7.06M
    int relative_y;
410
7.06M
    uint32_t* const histo_argb =
411
7.06M
        GetHistoArgb(all_argb, /*subsampling_index=*/0, mode);
412
7.06M
    if (start_y > 0) {
413
      // Read the row above the tile which will become the first upper_row.
414
      // Include a pixel to the left if it exists; include a pixel to the right
415
      // in all cases (wrapping to the leftmost pixel of the next row if it does
416
      // not exist).
417
6.78M
      memcpy(current_row + context_start_x,
418
6.78M
             argb + (start_y - 1) * width + context_start_x,
419
6.78M
             sizeof(*argb) * (max_x + have_left + 1));
420
6.78M
    }
421
175M
    for (relative_y = 0; relative_y < max_y; ++relative_y) {
422
168M
      const int y = start_y + relative_y;
423
168M
      int relative_x;
424
168M
      uint32_t* tmp = upper_row;
425
168M
      upper_row = current_row;
426
168M
      current_row = tmp;
427
      // Read current_row. Include a pixel to the left if it exists; include a
428
      // pixel to the right in all cases except at the bottom right corner of
429
      // the image (wrapping to the leftmost pixel of the next row if it does
430
      // not exist in the current row).
431
168M
      memcpy(current_row + context_start_x, argb + y * width + context_start_x,
432
168M
             sizeof(*argb) * (max_x + have_left + (y + 1 < height)));
433
168M
#if (WEBP_NEAR_LOSSLESS == 1)
434
168M
      if (max_quantization > 1 && y >= 1 && y + 1 < height) {
435
0
        MaxDiffsForRow(context_width, width, argb + y * width + context_start_x,
436
0
                       max_diffs + context_start_x, used_subtract_green);
437
0
      }
438
168M
#endif
439
440
168M
      GetResidual(width, height, upper_row, current_row, max_diffs, mode,
441
168M
                  start_x, start_x + max_x, y, max_quantization, exact,
442
168M
                  used_subtract_green, residuals);
443
4.97G
      for (relative_x = 0; relative_x < max_x; ++relative_x) {
444
4.80G
        UpdateHisto(histo_argb, residuals[relative_x]);
445
4.80G
      }
446
168M
      if (update_up_to_index > 0) {
447
0
        uint32_t subsampling_index;
448
0
        for (subsampling_index = 1; subsampling_index <= update_up_to_index;
449
0
             ++subsampling_index) {
450
0
          uint32_t* const super_histo =
451
0
              GetHistoArgb(all_argb, subsampling_index, mode);
452
0
          for (relative_x = 0; relative_x < max_x; ++relative_x) {
453
0
            UpdateHisto(super_histo, residuals[relative_x]);
454
0
          }
455
0
        }
456
0
      }
457
168M
    }
458
7.06M
  }
459
504k
}
460
461
// Converts pixels of the image to residuals with respect to predictions.
462
// If max_quantization > 1, applies near lossless processing, quantizing
463
// residuals to multiples of quantization levels up to max_quantization
464
// (the actual quantization level depends on smoothness near the given pixel).
465
static void CopyImageWithPrediction(int width, int height, int bits,
466
                                    const uint32_t* const modes,
467
                                    uint32_t* const argb_scratch,
468
                                    uint32_t* const argb, int low_effort,
469
                                    int max_quantization, int exact,
470
971
                                    int used_subtract_green) {
471
971
  const int tiles_per_row = VP8LSubSampleSize(width, bits);
472
  // The width of upper_row and current_row is one pixel larger than image width
473
  // to allow the top right pixel to point to the leftmost pixel of the next row
474
  // when at the right edge.
475
971
  uint32_t* upper_row = argb_scratch;
476
971
  uint32_t* current_row = upper_row + width + 1;
477
971
  uint8_t* current_max_diffs = (uint8_t*)(current_row + width + 1);
478
971
#if (WEBP_NEAR_LOSSLESS == 1)
479
971
  uint8_t* lower_max_diffs = current_max_diffs + width;
480
971
#endif
481
971
  int y;
482
483
549k
  for (y = 0; y < height; ++y) {
484
548k
    int x;
485
548k
    uint32_t* const tmp32 = upper_row;
486
548k
    upper_row = current_row;
487
548k
    current_row = tmp32;
488
548k
    memcpy(current_row, argb + y * width,
489
548k
           sizeof(*argb) * (width + (y + 1 < height)));
490
491
548k
    if (low_effort) {
492
0
      PredictBatch(kPredLowEffort, 0, y, width, current_row, upper_row,
493
0
                   argb + y * width);
494
548k
    } else {
495
548k
#if (WEBP_NEAR_LOSSLESS == 1)
496
548k
      if (max_quantization > 1) {
497
        // Compute max_diffs for the lower row now, because that needs the
498
        // contents of argb for the current row, which we will overwrite with
499
        // residuals before proceeding with the next row.
500
0
        uint8_t* const tmp8 = current_max_diffs;
501
0
        current_max_diffs = lower_max_diffs;
502
0
        lower_max_diffs = tmp8;
503
0
        if (y + 2 < height) {
504
0
          MaxDiffsForRow(width, width, argb + (y + 1) * width, lower_max_diffs,
505
0
                         used_subtract_green);
506
0
        }
507
0
      }
508
548k
#endif
509
6.44M
      for (x = 0; x < width;) {
510
5.89M
        const int mode =
511
5.89M
            (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
512
5.89M
        int x_end = x + (1 << bits);
513
5.89M
        if (x_end > width) x_end = width;
514
5.89M
        GetResidual(width, height, upper_row, current_row, current_max_diffs,
515
5.89M
                    mode, x, x_end, y, max_quantization, exact,
516
5.89M
                    used_subtract_green, argb + y * width + x);
517
5.89M
        x = x_end;
518
5.89M
      }
519
548k
    }
520
548k
  }
521
971
}
522
523
// Checks whether 'image' can be subsampled by finding the biggest power of 2
524
// squares (defined by 'best_bits') of uniform value it is made out of.
525
void VP8LOptimizeSampling(uint32_t* const image, int full_width,
526
                          int full_height, int bits, int max_bits,
527
2.24k
                          int* best_bits_out) {
528
2.24k
  int width = VP8LSubSampleSize(full_width, bits);
529
2.24k
  int height = VP8LSubSampleSize(full_height, bits);
530
2.24k
  int old_width, x, y, square_size;
531
2.24k
  int best_bits = bits;
532
2.24k
  *best_bits_out = bits;
533
  // Check rows first.
534
5.62k
  while (best_bits < max_bits) {
535
4.93k
    const int new_square_size = 1 << (best_bits + 1 - bits);
536
4.93k
    int is_good = 1;
537
4.93k
    square_size = 1 << (best_bits - bits);
538
21.0k
    for (y = 0; y + square_size < height; y += new_square_size) {
539
      // Check the first lines of consecutive line groups.
540
17.7k
      if (memcmp(&image[y * width], &image[(y + square_size) * width],
541
17.7k
                 width * sizeof(*image)) != 0) {
542
1.55k
        is_good = 0;
543
1.55k
        break;
544
1.55k
      }
545
17.7k
    }
546
4.93k
    if (is_good) {
547
3.38k
      ++best_bits;
548
3.38k
    } else {
549
1.55k
      break;
550
1.55k
    }
551
4.93k
  }
552
2.24k
  if (best_bits == bits) return;
553
554
  // Check columns.
555
1.17k
  while (best_bits > bits) {
556
1.08k
    int is_good = 1;
557
1.08k
    square_size = 1 << (best_bits - bits);
558
17.3k
    for (y = 0; is_good && y < height; ++y) {
559
48.7k
      for (x = 0; is_good && x < width; x += square_size) {
560
32.4k
        int i;
561
365k
        for (i = x + 1; i < GetMin(x + square_size, width); ++i) {
562
333k
          if (image[y * width + i] != image[y * width + x]) {
563
328
            is_good = 0;
564
328
            break;
565
328
          }
566
333k
        }
567
32.4k
      }
568
16.2k
    }
569
1.08k
    if (is_good) {
570
759
      break;
571
759
    }
572
328
    --best_bits;
573
328
  }
574
843
  if (best_bits == bits) return;
575
576
  // Subsample the image.
577
759
  old_width = width;
578
759
  square_size = 1 << (best_bits - bits);
579
759
  width = VP8LSubSampleSize(full_width, best_bits);
580
759
  height = VP8LSubSampleSize(full_height, best_bits);
581
2.46k
  for (y = 0; y < height; ++y) {
582
5.21k
    for (x = 0; x < width; ++x) {
583
3.51k
      image[y * width + x] = image[square_size * (y * old_width + x)];
584
3.51k
    }
585
1.70k
  }
586
759
  *best_bits_out = best_bits;
587
759
}
588
589
// Computes the best predictor image.
590
// Finds the best predictors per tile. Once done, finds the best predictor image
591
// sampling.
592
// best_bits is set to 0 in case of error.
593
// The following requires some glossary:
594
// - a tile is a square of side 2^min_bits pixels.
595
// - a super-tile of a tile is a square of side 2^bits pixels with bits in
596
// [min_bits+1, max_bits].
597
// - the max-tile of a tile is the square of 2^max_bits pixels containing it.
598
//   If this max-tile crosses the border of an image, it is cropped.
599
// - tile, super-tiles and max_tile are aligned on powers of 2 in the original
600
//   image.
601
// - coordinates for tile, super-tile, max-tile are respectively named
602
//   tile_x, super_tile_x, max_tile_x at their bit scale.
603
// - in the max-tile, a tile has local coordinates (local_tile_x, local_tile_y).
604
// The tiles are processed in the following zigzag order to complete the
605
// super-tiles as soon as possible:
606
//   1  2|  5  6
607
//   3  4|  7  8
608
// --------------
609
//   9 10| 13 14
610
//  11 12| 15 16
611
// When computing the residuals for a tile, the histogram of the above
612
// super-tile is updated. If this super-tile is finished, its histogram is used
613
// to update the histogram of the next super-tile and so on up to the max-tile.
614
static void GetBestPredictorsAndSubSampling(
615
    int width, int height, const int min_bits, const int max_bits,
616
    uint32_t* const argb_scratch, const uint32_t* const argb,
617
    int max_quantization, int exact, int used_subtract_green,
618
    const WebPPicture* const pic, int percent_range, int* const percent,
619
971
    uint32_t** const all_modes, int* best_bits, uint32_t** best_mode) {
620
971
  const uint32_t tiles_per_row = VP8LSubSampleSize(width, min_bits);
621
971
  const uint32_t tiles_per_col = VP8LSubSampleSize(height, min_bits);
622
971
  int64_t best_cost;
623
971
  uint32_t subsampling_index;
624
971
  const uint32_t max_subsampling_index = max_bits - min_bits;
625
  // Compute the needed memory size for residual histograms, accumulated
626
  // residual histograms and predictor histograms.
627
971
  const int num_argb = (max_subsampling_index + 1) * kNumPredModes * HISTO_SIZE;
628
971
  const int num_accumulated_rgb = (max_subsampling_index + 1) * HISTO_SIZE;
629
971
  const int num_predictors = (max_subsampling_index + 1) * kNumPredModes;
630
971
  uint32_t* const raw_data = (uint32_t*)WebPSafeCalloc(
631
971
      num_argb + num_accumulated_rgb + num_predictors, sizeof(uint32_t));
632
971
  uint32_t* const all_argb = raw_data;
633
971
  uint32_t* const all_accumulated_argb = all_argb + num_argb;
634
971
  uint32_t* const all_pred_histos = all_accumulated_argb + num_accumulated_rgb;
635
971
  const int max_tile_size = 1 << max_subsampling_index;  // in tile size
636
971
  int percent_start = *percent;
637
  // When using the residuals of a tile for its super-tiles, you can either:
638
  // - use each residual to update the histogram of the super-tile, with a cost
639
  //   of 4 * (1<<n)^2 increment operations (4 for the number of channels, and
640
  //   (1<<n)^2 for the number of pixels in the tile)
641
  // - use the histogram of the tile to update the histogram of the super-tile,
642
  //   with a cost of HISTO_SIZE (1024)
643
  // The first method is therefore faster until n==4. 'update_up_to_index'
644
  // defines the maximum subsampling_index for which the residuals should be
645
  // individually added to the super-tile histogram.
646
971
  const uint32_t update_up_to_index =
647
971
      GetMax(GetMin(4, max_bits), min_bits) - min_bits;
648
  // Coordinates in the max-tile in tile units.
649
971
  uint32_t local_tile_x = 0, local_tile_y = 0;
650
971
  uint32_t max_tile_x = 0, max_tile_y = 0;
651
971
  uint32_t tile_x = 0, tile_y = 0;
652
653
971
  *best_bits = 0;
654
971
  *best_mode = NULL;
655
971
  if (raw_data == NULL) return;
656
657
505k
  while (tile_y < tiles_per_col) {
658
504k
    ComputeResidualsForTile(width, height, tile_x, tile_y, min_bits,
659
504k
                            update_up_to_index, all_argb, argb_scratch, argb,
660
504k
                            max_quantization, exact, used_subtract_green);
661
662
    // Update all the super-tiles that are complete.
663
504k
    subsampling_index = 0;
664
504k
    while (1) {
665
504k
      const uint32_t super_tile_x = tile_x >> subsampling_index;
666
504k
      const uint32_t super_tile_y = tile_y >> subsampling_index;
667
504k
      const uint32_t super_tiles_per_row =
668
504k
          VP8LSubSampleSize(width, min_bits + subsampling_index);
669
504k
      GetBestPredictorForTile(all_argb, subsampling_index, super_tile_x,
670
504k
                              super_tile_y, super_tiles_per_row,
671
504k
                              all_accumulated_argb, all_modes, all_pred_histos);
672
504k
      if (subsampling_index == max_subsampling_index) break;
673
674
      // Update the following super-tile histogram if it has not been updated
675
      // yet.
676
0
      ++subsampling_index;
677
0
      if (subsampling_index > update_up_to_index &&
678
0
          subsampling_index <= max_subsampling_index) {
679
0
        VP8LAddVectorEq(
680
0
            GetHistoArgbConst(all_argb, subsampling_index - 1, /*mode=*/0),
681
0
            GetHistoArgb(all_argb, subsampling_index, /*mode=*/0),
682
0
            HISTO_SIZE * kNumPredModes);
683
0
      }
684
      // Check whether the super-tile is not complete (if the smallest tile
685
      // is not at the end of a line/column or at the beginning of a super-tile
686
      // of size (1 << subsampling_index)).
687
0
      if (!((tile_x == (tiles_per_row - 1) ||
688
0
             (local_tile_x + 1) % (1 << subsampling_index) == 0) &&
689
0
            (tile_y == (tiles_per_col - 1) ||
690
0
             (local_tile_y + 1) % (1 << subsampling_index) == 0))) {
691
0
        --subsampling_index;
692
        // subsampling_index now is the index of the last finished super-tile.
693
0
        break;
694
0
      }
695
0
    }
696
    // Reset all the histograms belonging to finished tiles.
697
504k
    memset(all_argb, 0,
698
504k
           HISTO_SIZE * kNumPredModes * (subsampling_index + 1) *
699
504k
               sizeof(*all_argb));
700
701
504k
    if (subsampling_index == max_subsampling_index) {
702
      // If a new max-tile is started.
703
504k
      if (tile_x == (tiles_per_row - 1)) {
704
22.0k
        max_tile_x = 0;
705
22.0k
        ++max_tile_y;
706
482k
      } else {
707
482k
        ++max_tile_x;
708
482k
      }
709
504k
      local_tile_x = 0;
710
504k
      local_tile_y = 0;
711
504k
    } else {
712
      // Proceed with the Z traversal.
713
0
      uint32_t coord_x = local_tile_x >> subsampling_index;
714
0
      uint32_t coord_y = local_tile_y >> subsampling_index;
715
0
      if (tile_x == (tiles_per_row - 1) && coord_x % 2 == 0) {
716
0
        ++coord_y;
717
0
      } else {
718
0
        if (coord_x % 2 == 0) {
719
0
          ++coord_x;
720
0
        } else {
721
          // Z traversal.
722
0
          ++coord_y;
723
0
          --coord_x;
724
0
        }
725
0
      }
726
0
      local_tile_x = coord_x << subsampling_index;
727
0
      local_tile_y = coord_y << subsampling_index;
728
0
    }
729
504k
    tile_x = max_tile_x * max_tile_size + local_tile_x;
730
504k
    tile_y = max_tile_y * max_tile_size + local_tile_y;
731
732
504k
    if (tile_x == 0 &&
733
22.0k
        !WebPReportProgress(
734
22.0k
            pic, percent_start + percent_range * tile_y / tiles_per_col,
735
22.0k
            percent)) {
736
0
      WebPSafeFree(raw_data);
737
0
      return;
738
0
    }
739
504k
  }
740
741
  // Figure out the best sampling.
742
971
  best_cost = WEBP_INT64_MAX;
743
1.94k
  for (subsampling_index = 0; subsampling_index <= max_subsampling_index;
744
971
       ++subsampling_index) {
745
971
    int plane;
746
971
    const uint32_t* const accumulated =
747
971
        GetAccumulatedHisto(all_accumulated_argb, subsampling_index);
748
971
    int64_t cost = VP8LShannonEntropy(
749
971
        &all_pred_histos[subsampling_index * kNumPredModes], kNumPredModes);
750
4.85k
    for (plane = 0; plane < 4; ++plane) {
751
3.88k
      cost += VP8LShannonEntropy(&accumulated[plane * 256], 256);
752
3.88k
    }
753
971
    if (cost < best_cost) {
754
971
      best_cost = cost;
755
971
      *best_bits = min_bits + subsampling_index;
756
971
      *best_mode = all_modes[subsampling_index];
757
971
    }
758
971
  }
759
760
971
  WebPSafeFree(raw_data);
761
762
971
  VP8LOptimizeSampling(*best_mode, width, height, *best_bits,
763
971
                       MAX_TRANSFORM_BITS, best_bits);
764
971
}
765
766
// Finds the best predictor for each tile, and converts the image to residuals
767
// with respect to predictions. If near_lossless_quality < 100, applies
768
// near lossless processing, shaving off more bits of residuals for lower
769
// qualities.
770
int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
771
                      int low_effort, uint32_t* const argb,
772
                      uint32_t* const argb_scratch, uint32_t* const image,
773
                      int near_lossless_quality, int exact,
774
                      int used_subtract_green, const WebPPicture* const pic,
775
                      int percent_range, int* const percent,
776
971
                      int* const best_bits) {
777
971
  int percent_start = *percent;
778
971
  const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
779
971
  if (low_effort) {
780
0
    const int tiles_per_row = VP8LSubSampleSize(width, max_bits);
781
0
    const int tiles_per_col = VP8LSubSampleSize(height, max_bits);
782
0
    int i;
783
0
    for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
784
0
      image[i] = ARGB_BLACK | (kPredLowEffort << 8);
785
0
    }
786
0
    *best_bits = max_bits;
787
971
  } else {
788
    // Allocate data to try all samplings from min_bits to max_bits.
789
971
    int bits;
790
971
    uint32_t sum_num_pixels = 0u;
791
971
    uint32_t *modes_raw, *best_mode;
792
971
    uint32_t* modes[MAX_TRANSFORM_BITS + 1];
793
971
    uint32_t num_pixels[MAX_TRANSFORM_BITS + 1];
794
1.94k
    for (bits = min_bits; bits <= max_bits; ++bits) {
795
971
      const int tiles_per_row = VP8LSubSampleSize(width, bits);
796
971
      const int tiles_per_col = VP8LSubSampleSize(height, bits);
797
971
      num_pixels[bits] = tiles_per_row * tiles_per_col;
798
971
      sum_num_pixels += num_pixels[bits];
799
971
    }
800
971
    modes_raw = (uint32_t*)WebPSafeMalloc(sum_num_pixels, sizeof(*modes_raw));
801
971
    if (modes_raw == NULL) return 0;
802
    // Have modes point to the right global memory modes_raw.
803
971
    modes[min_bits] = modes_raw;
804
971
    for (bits = min_bits + 1; bits <= max_bits; ++bits) {
805
0
      modes[bits] = modes[bits - 1] + num_pixels[bits - 1];
806
0
    }
807
    // Find the best sampling.
808
971
    GetBestPredictorsAndSubSampling(
809
971
        width, height, min_bits, max_bits, argb_scratch, argb, max_quantization,
810
971
        exact, used_subtract_green, pic, percent_range, percent,
811
971
        &modes[min_bits], best_bits, &best_mode);
812
971
    if (*best_bits == 0) {
813
0
      WebPSafeFree(modes_raw);
814
0
      return 0;
815
0
    }
816
    // Keep the best predictor image.
817
971
    memcpy(image, best_mode,
818
971
           VP8LSubSampleSize(width, *best_bits) *
819
971
               VP8LSubSampleSize(height, *best_bits) * sizeof(*image));
820
971
    WebPSafeFree(modes_raw);
821
971
  }
822
823
971
  CopyImageWithPrediction(width, height, *best_bits, image, argb_scratch, argb,
824
971
                          low_effort, max_quantization, exact,
825
971
                          used_subtract_green);
826
971
  return WebPReportProgress(pic, percent_start + percent_range, percent);
827
971
}
828
829
//------------------------------------------------------------------------------
830
// Color transform functions.
831
832
267k
static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
833
267k
  m->green_to_red = 0;
834
267k
  m->green_to_blue = 0;
835
267k
  m->red_to_blue = 0;
836
267k
}
837
838
static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
839
257k
                                               VP8LMultipliers* const m) {
840
257k
  m->green_to_red = (color_code >> 0) & 0xff;
841
257k
  m->green_to_blue = (color_code >> 8) & 0xff;
842
257k
  m->red_to_blue = (color_code >> 16) & 0xff;
843
257k
}
844
845
static WEBP_INLINE uint32_t
846
266k
MultipliersToColorCode(const VP8LMultipliers* const m) {
847
266k
  return 0xff000000u | ((uint32_t)(m->red_to_blue) << 16) |
848
266k
         ((uint32_t)(m->green_to_blue) << 8) | m->green_to_red;
849
266k
}
850
851
static int64_t PredictionCostCrossColor(const uint32_t accumulated[256],
852
15.2M
                                        const uint32_t counts[256]) {
853
  // Favor low entropy, locally and globally.
854
  // Favor small absolute values for PredictionCostSpatial
855
15.2M
  static const uint64_t kExpValue = 240;
856
15.2M
  return (int64_t)VP8LCombinedShannonEntropy(counts, accumulated) +
857
15.2M
         PredictionCostBias(counts, 3, kExpValue);
858
15.2M
}
859
860
static int64_t GetPredictionCostCrossColorRed(
861
    const uint32_t* argb, int stride, int tile_width, int tile_height,
862
    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
863
3.46M
    const uint32_t accumulated_red_histo[256]) {
864
3.46M
  uint32_t histo[256] = {0};
865
3.46M
  int64_t cur_diff;
866
867
3.46M
  VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
868
3.46M
                                green_to_red, histo);
869
870
3.46M
  cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
871
3.46M
  if ((uint8_t)green_to_red == prev_x.green_to_red) {
872
    // favor keeping the areas locally similar
873
266k
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
874
266k
  }
875
3.46M
  if ((uint8_t)green_to_red == prev_y.green_to_red) {
876
    // favor keeping the areas locally similar
877
265k
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
878
265k
  }
879
3.46M
  if (green_to_red == 0) {
880
288k
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
881
288k
  }
882
3.46M
  return cur_diff;
883
3.46M
}
884
885
static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
886
                              int tile_height, VP8LMultipliers prev_x,
887
                              VP8LMultipliers prev_y, int quality,
888
                              const uint32_t accumulated_red_histo[256],
889
266k
                              VP8LMultipliers* const best_tx) {
890
266k
  const int kMaxIters = 4 + ((7 * quality) >> 8);  // in range [4..6]
891
266k
  int green_to_red_best = 0;
892
266k
  int iter, offset;
893
266k
  int64_t best_diff = GetPredictionCostCrossColorRed(
894
266k
      argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_red_best,
895
266k
      accumulated_red_histo);
896
1.86M
  for (iter = 0; iter < kMaxIters; ++iter) {
897
    // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
898
    // one in color computation. Having initial delta here as 1 is sufficient
899
    // to explore the range of (-2, 2).
900
1.60M
    const int delta = 32 >> iter;
901
    // Try a negative and a positive delta from the best known value.
902
4.80M
    for (offset = -delta; offset <= delta; offset += 2 * delta) {
903
3.20M
      const int green_to_red_cur = offset + green_to_red_best;
904
3.20M
      const int64_t cur_diff = GetPredictionCostCrossColorRed(
905
3.20M
          argb, stride, tile_width, tile_height, prev_x, prev_y,
906
3.20M
          green_to_red_cur, accumulated_red_histo);
907
3.20M
      if (cur_diff < best_diff) {
908
99.1k
        best_diff = cur_diff;
909
99.1k
        green_to_red_best = green_to_red_cur;
910
99.1k
      }
911
3.20M
    }
912
1.60M
  }
913
266k
  best_tx->green_to_red = (green_to_red_best & 0xff);
914
266k
}
915
916
static int64_t GetPredictionCostCrossColorBlue(
917
    const uint32_t* argb, int stride, int tile_width, int tile_height,
918
    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_blue,
919
11.7M
    int red_to_blue, const uint32_t accumulated_blue_histo[256]) {
920
11.7M
  uint32_t histo[256] = {0};
921
11.7M
  int64_t cur_diff;
922
923
11.7M
  VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
924
11.7M
                                 green_to_blue, red_to_blue, histo);
925
926
11.7M
  cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
927
11.7M
  if ((uint8_t)green_to_blue == prev_x.green_to_blue) {
928
    // favor keeping the areas locally similar
929
2.87M
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
930
2.87M
  }
931
11.7M
  if ((uint8_t)green_to_blue == prev_y.green_to_blue) {
932
    // favor keeping the areas locally similar
933
2.87M
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
934
2.87M
  }
935
11.7M
  if ((uint8_t)red_to_blue == prev_x.red_to_blue) {
936
    // favor keeping the areas locally similar
937
2.91M
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
938
2.91M
  }
939
11.7M
  if ((uint8_t)red_to_blue == prev_y.red_to_blue) {
940
    // favor keeping the areas locally similar
941
2.91M
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
942
2.91M
  }
943
11.7M
  if (green_to_blue == 0) {
944
2.92M
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
945
2.92M
  }
946
11.7M
  if (red_to_blue == 0) {
947
2.87M
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
948
2.87M
  }
949
11.7M
  return cur_diff;
950
11.7M
}
951
952
12.9M
#define kGreenRedToBlueNumAxis 8
953
266k
#define kGreenRedToBlueMaxIters 7
954
static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
955
                                  int tile_width, int tile_height,
956
                                  VP8LMultipliers prev_x,
957
                                  VP8LMultipliers prev_y, int quality,
958
                                  const uint32_t accumulated_blue_histo[256],
959
266k
                                  VP8LMultipliers* const best_tx) {
960
266k
  const int8_t offset[kGreenRedToBlueNumAxis][2] = {
961
266k
      {0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
962
266k
  const int8_t delta_lut[kGreenRedToBlueMaxIters] = {16, 16, 8, 4, 2, 2, 2};
963
266k
  const int iters = (quality < 25)   ? 1
964
266k
                    : (quality > 50) ? kGreenRedToBlueMaxIters
965
266k
                                     : 4;
966
266k
  int green_to_blue_best = 0;
967
266k
  int red_to_blue_best = 0;
968
266k
  int iter;
969
  // Initial value at origin:
970
266k
  int64_t best_diff = GetPredictionCostCrossColorBlue(
971
266k
      argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_blue_best,
972
266k
      red_to_blue_best, accumulated_blue_histo);
973
1.49M
  for (iter = 0; iter < iters; ++iter) {
974
1.43M
    const int delta = delta_lut[iter];
975
1.43M
    int axis;
976
12.9M
    for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
977
11.5M
      const int green_to_blue_cur =
978
11.5M
          offset[axis][0] * delta + green_to_blue_best;
979
11.5M
      const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
980
11.5M
      const int64_t cur_diff = GetPredictionCostCrossColorBlue(
981
11.5M
          argb, stride, tile_width, tile_height, prev_x, prev_y,
982
11.5M
          green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
983
11.5M
      if (cur_diff < best_diff) {
984
132k
        best_diff = cur_diff;
985
132k
        green_to_blue_best = green_to_blue_cur;
986
132k
        red_to_blue_best = red_to_blue_cur;
987
132k
      }
988
11.5M
      if (quality < 25 && iter == 4) {
989
        // Only axis aligned diffs for lower quality.
990
0
        break;  // next iter.
991
0
      }
992
11.5M
    }
993
1.43M
    if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
994
      // Further iterations would not help.
995
214k
      break;  // out of iter-loop.
996
214k
    }
997
1.43M
  }
998
266k
  best_tx->green_to_blue = green_to_blue_best & 0xff;
999
266k
  best_tx->red_to_blue = red_to_blue_best & 0xff;
1000
266k
}
1001
#undef kGreenRedToBlueMaxIters
1002
#undef kGreenRedToBlueNumAxis
1003
1004
static VP8LMultipliers GetBestColorTransformForTile(
1005
    int tile_x, int tile_y, int bits, VP8LMultipliers prev_x,
1006
    VP8LMultipliers prev_y, int quality, int xsize, int ysize,
1007
    const uint32_t accumulated_red_histo[256],
1008
266k
    const uint32_t accumulated_blue_histo[256], const uint32_t* const argb) {
1009
266k
  const int max_tile_size = 1 << bits;
1010
266k
  const int tile_y_offset = tile_y * max_tile_size;
1011
266k
  const int tile_x_offset = tile_x * max_tile_size;
1012
266k
  const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
1013
266k
  const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
1014
266k
  const int tile_width = all_x_max - tile_x_offset;
1015
266k
  const int tile_height = all_y_max - tile_y_offset;
1016
266k
  const uint32_t* const tile_argb =
1017
266k
      argb + tile_y_offset * xsize + tile_x_offset;
1018
266k
  VP8LMultipliers best_tx;
1019
266k
  MultipliersClear(&best_tx);
1020
1021
266k
  GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height, prev_x, prev_y,
1022
266k
                    quality, accumulated_red_histo, &best_tx);
1023
266k
  GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height, prev_x,
1024
266k
                        prev_y, quality, accumulated_blue_histo, &best_tx);
1025
266k
  return best_tx;
1026
266k
}
1027
1028
static void CopyTileWithColorTransform(int xsize, int ysize, int tile_x,
1029
                                       int tile_y, int max_tile_size,
1030
                                       VP8LMultipliers color_transform,
1031
266k
                                       uint32_t* argb) {
1032
266k
  const int xscan = GetMin(max_tile_size, xsize - tile_x);
1033
266k
  int yscan = GetMin(max_tile_size, ysize - tile_y);
1034
266k
  argb += tile_y * xsize + tile_x;
1035
4.92M
  while (yscan-- > 0) {
1036
4.65M
    VP8LTransformColor(&color_transform, argb, xscan);
1037
4.65M
    argb += xsize;
1038
4.65M
  }
1039
266k
}
1040
1041
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
1042
                            uint32_t* const argb, uint32_t* image,
1043
                            const WebPPicture* const pic, int percent_range,
1044
356
                            int* const percent, int* const best_bits) {
1045
356
  const int max_tile_size = 1 << bits;
1046
356
  const int tile_xsize = VP8LSubSampleSize(width, bits);
1047
356
  const int tile_ysize = VP8LSubSampleSize(height, bits);
1048
356
  int percent_start = *percent;
1049
356
  uint32_t accumulated_red_histo[256] = {0};
1050
356
  uint32_t accumulated_blue_histo[256] = {0};
1051
356
  int tile_x, tile_y;
1052
356
  VP8LMultipliers prev_x, prev_y;
1053
356
  MultipliersClear(&prev_y);
1054
356
  MultipliersClear(&prev_x);
1055
9.79k
  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1056
276k
    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1057
266k
      int y;
1058
266k
      const int tile_x_offset = tile_x * max_tile_size;
1059
266k
      const int tile_y_offset = tile_y * max_tile_size;
1060
266k
      const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
1061
266k
      const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
1062
266k
      const int offset = tile_y * tile_xsize + tile_x;
1063
266k
      if (tile_y != 0) {
1064
257k
        ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
1065
257k
      }
1066
266k
      prev_x = GetBestColorTransformForTile(
1067
266k
          tile_x, tile_y, bits, prev_x, prev_y, quality, width, height,
1068
266k
          accumulated_red_histo, accumulated_blue_histo, argb);
1069
266k
      image[offset] = MultipliersToColorCode(&prev_x);
1070
266k
      CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
1071
266k
                                 max_tile_size, prev_x, argb);
1072
1073
      // Gather accumulated histogram data.
1074
4.92M
      for (y = tile_y_offset; y < all_y_max; ++y) {
1075
4.65M
        int ix = y * width + tile_x_offset;
1076
4.65M
        const int ix_end = ix + all_x_max - tile_x_offset;
1077
117M
        for (; ix < ix_end; ++ix) {
1078
113M
          const uint32_t pix = argb[ix];
1079
113M
          if (ix >= 2 && pix == argb[ix - 2] && pix == argb[ix - 1]) {
1080
106M
            continue;  // repeated pixels are handled by backward references
1081
106M
          }
1082
6.12M
          if (ix >= width + 2 && argb[ix - 2] == argb[ix - width - 2] &&
1083
3.35M
              argb[ix - 1] == argb[ix - width - 1] && pix == argb[ix - width]) {
1084
1.36M
            continue;  // repeated pixels are handled by backward references
1085
1.36M
          }
1086
4.75M
          ++accumulated_red_histo[(pix >> 16) & 0xff];
1087
4.75M
          ++accumulated_blue_histo[(pix >> 0) & 0xff];
1088
4.75M
        }
1089
4.65M
      }
1090
266k
    }
1091
9.43k
    if (!WebPReportProgress(pic,
1092
9.43k
                            percent_start + percent_range * tile_y / tile_ysize,
1093
9.43k
                            percent)) {
1094
0
      return 0;
1095
0
    }
1096
9.43k
  }
1097
356
  VP8LOptimizeSampling(image, width, height, bits, MAX_TRANSFORM_BITS,
1098
356
                       best_bits);
1099
356
  return 1;
1100
356
}