Coverage Report

Created: 2026-01-20 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwebp/src/enc/quant_enc.c
Line
Count
Source
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
//   Quantization
11
//
12
// Author: Skal (pascal.massimino@gmail.com)
13
14
#include <assert.h>
15
#include <math.h>
16
#include <stdlib.h>  // for abs()
17
#include <string.h>
18
19
#include "src/dec/common_dec.h"
20
#include "src/dsp/dsp.h"
21
#include "src/dsp/quant.h"
22
#include "src/enc/cost_enc.h"
23
#include "src/enc/vp8i_enc.h"
24
#include "src/webp/types.h"
25
26
972M
#define DO_TRELLIS_I4 1
27
38.0M
#define DO_TRELLIS_I16 1  // not a huge gain, but ok at low bitrate.
28
38.0M
#define DO_TRELLIS_UV 0   // disable trellis for UV. Risky. Not worth.
29
#define USE_TDISTO 1
30
31
4.47k
#define MID_ALPHA 64   // neutral value for susceptibility
32
4.47k
#define MIN_ALPHA 30   // lowest usable value for susceptibility
33
4.47k
#define MAX_ALPHA 100  // higher meaningful value for susceptibility
34
35
#define SNS_TO_DQ \
36
4.47k
  0.9  // Scaling constant between the sns value and the QP
37
       // power-law modulation. Must be strictly less than 1.
38
39
// number of non-zero coeffs below which we consider the block very flat
40
// (and apply a penalty to complex predictions)
41
6.05M
#define FLATNESS_LIMIT_I16 0  // I16 mode (special case)
42
437M
#define FLATNESS_LIMIT_I4 3   // I4 mode
43
14.2M
#define FLATNESS_LIMIT_UV 2   // UV mode
44
305M
#define FLATNESS_PENALTY 140  // roughly ~1bit per block
45
46
505M
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
47
48
808M
#define RD_DISTO_MULT 256  // distortion multiplier (equivalent of lambda)
49
50
// #define DEBUG_BLOCK
51
52
//------------------------------------------------------------------------------
53
54
#if defined(DEBUG_BLOCK)
55
56
#include <stdio.h>
57
#include <stdlib.h>
58
59
static void PrintBlockInfo(const VP8EncIterator* const it,
60
                           const VP8ModeScore* const rd) {
61
  int i, j;
62
  const int is_i16 = (it->mb->type == 1);
63
  const uint8_t* const y_in = it->yuv_in + Y_OFF_ENC;
64
  const uint8_t* const y_out = it->yuv_out + Y_OFF_ENC;
65
  const uint8_t* const uv_in = it->yuv_in + U_OFF_ENC;
66
  const uint8_t* const uv_out = it->yuv_out + U_OFF_ENC;
67
  printf("SOURCE / OUTPUT / ABS DELTA\n");
68
  for (j = 0; j < 16; ++j) {
69
    for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
70
    printf("     ");
71
    for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
72
    printf("     ");
73
    for (i = 0; i < 16; ++i) {
74
      printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
75
    }
76
    printf("\n");
77
  }
78
  printf("\n");  // newline before the U/V block
79
  for (j = 0; j < 8; ++j) {
80
    for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
81
    printf(" ");
82
    for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
83
    printf("    ");
84
    for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
85
    printf(" ");
86
    for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
87
    printf("   ");
88
    for (i = 0; i < 8; ++i) {
89
      printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
90
    }
91
    printf(" ");
92
    for (i = 8; i < 16; ++i) {
93
      printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
94
    }
95
    printf("\n");
96
  }
97
  printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", (int)rd->D, (int)rd->SD,
98
         (int)rd->R, (int)rd->H, (int)rd->nz, (int)rd->score);
99
  if (is_i16) {
100
    printf("Mode: %d\n", rd->mode_i16);
101
    printf("y_dc_levels:");
102
    for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
103
    printf("\n");
104
  } else {
105
    printf("Modes[16]: ");
106
    for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
107
    printf("\n");
108
  }
109
  printf("y_ac_levels:\n");
110
  for (j = 0; j < 16; ++j) {
111
    for (i = is_i16 ? 1 : 0; i < 16; ++i) {
112
      printf("%4d ", rd->y_ac_levels[j][i]);
113
    }
114
    printf("\n");
115
  }
116
  printf("\n");
117
  printf("uv_levels (mode=%d):\n", rd->mode_uv);
118
  for (j = 0; j < 8; ++j) {
119
    for (i = 0; i < 16; ++i) {
120
      printf("%4d ", rd->uv_levels[j][i]);
121
    }
122
    printf("\n");
123
  }
124
}
125
126
#endif  // DEBUG_BLOCK
127
128
//------------------------------------------------------------------------------
129
130
144k
static WEBP_INLINE int clip(int v, int m, int M) {
131
144k
  return v < m ? m : v > M ? M : v;
132
144k
}
133
134
static const uint8_t kZigzag[16] = {0, 1,  4,  8,  5, 2,  3,  6,
135
                                    9, 12, 13, 10, 7, 11, 14, 15};
136
137
static const uint8_t kDcTable[128] = {
138
    4,   5,   6,   7,   8,   9,   10,  10,  11,  12,  13,  14,  15,  16,  17,
139
    17,  18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,
140
    27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,
141
    41,  42,  43,  44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,
142
    55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
143
    70,  71,  72,  73,  74,  75,  76,  76,  77,  78,  79,  80,  81,  82,  83,
144
    84,  85,  86,  87,  88,  89,  91,  93,  95,  96,  98,  100, 101, 102, 104,
145
    106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136,
146
    138, 140, 143, 145, 148, 151, 154, 157};
147
148
static const uint16_t kAcTable[128] = {
149
    4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,
150
    19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,
151
    34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
152
    49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,
153
    70,  72,  74,  76,  78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98,
154
    100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134,
155
    137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181,
156
    185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245,
157
    249, 254, 259, 264, 269, 274, 279, 284};
158
159
static const uint16_t kAcTable2[128] = {
160
    8,   8,   9,   10,  12,  13,  15,  17,  18,  20,  21,  23,  24,  26,  27,
161
    29,  31,  32,  34,  35,  37,  38,  40,  41,  43,  44,  46,  48,  49,  51,
162
    52,  54,  55,  57,  58,  60,  62,  63,  65,  66,  68,  69,  71,  72,  74,
163
    75,  77,  79,  80,  82,  83,  85,  86,  88,  89,  93,  96,  99,  102, 105,
164
    108, 111, 114, 117, 120, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151,
165
    155, 158, 161, 164, 167, 170, 173, 176, 179, 184, 189, 193, 198, 203, 207,
166
    212, 217, 221, 226, 230, 235, 240, 244, 249, 254, 258, 263, 268, 274, 280,
167
    286, 292, 299, 305, 311, 317, 323, 330, 336, 342, 348, 354, 362, 370, 379,
168
    385, 393, 401, 409, 416, 424, 432, 440};
169
170
static const uint8_t kBiasMatrices[3][2] = {  // [luma-ac,luma-dc,chroma][dc,ac]
171
    {96, 110},
172
    {96, 108},
173
    {110, 115}};
174
175
// Sharpening by (slightly) raising the hi-frequency coeffs.
176
// Hack-ish but helpful for mid-bitrate range. Use with care.
177
265k
#define SHARPEN_BITS 11  // number of descaling bits for sharpening bias
178
static const uint8_t kFreqSharpening[16] = {0,  30, 60, 90, 30, 60, 90, 90,
179
                                            60, 90, 90, 90, 90, 90, 90, 90};
180
181
//------------------------------------------------------------------------------
182
// Initialize quantization parameters in VP8Matrix
183
184
// Returns the average quantizer
185
49.7k
static int ExpandMatrix(VP8Matrix* const m, int type) {
186
49.7k
  int i, sum;
187
149k
  for (i = 0; i < 2; ++i) {
188
99.4k
    const int is_ac_coeff = (i > 0);
189
99.4k
    const int bias = kBiasMatrices[type][is_ac_coeff];
190
99.4k
    m->iq[i] = (1 << QFIX) / m->q[i];
191
99.4k
    m->bias[i] = BIAS(bias);
192
    // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is:
193
    //   * zero if coeff <= zthresh
194
    //   * non-zero if coeff > zthresh
195
99.4k
    m->zthresh[i] = ((1 << QFIX) - 1 - m->bias[i]) / m->iq[i];
196
99.4k
  }
197
746k
  for (i = 2; i < 16; ++i) {
198
696k
    m->q[i] = m->q[1];
199
696k
    m->iq[i] = m->iq[1];
200
696k
    m->bias[i] = m->bias[1];
201
696k
    m->zthresh[i] = m->zthresh[1];
202
696k
  }
203
845k
  for (sum = 0, i = 0; i < 16; ++i) {
204
795k
    if (type == 0) {  // we only use sharpening for AC luma coeffs
205
265k
      m->sharpen[i] = (kFreqSharpening[i] * m->q[i]) >> SHARPEN_BITS;
206
530k
    } else {
207
530k
      m->sharpen[i] = 0;
208
530k
    }
209
795k
    sum += m->q[i];
210
795k
  }
211
49.7k
  return (sum + 8) >> 4;
212
49.7k
}
213
214
132k
static void CheckLambdaValue(int* const v) {
215
132k
  if (*v < 1) *v = 1;
216
132k
}
217
218
4.47k
static void SetupMatrices(VP8Encoder* enc) {
219
4.47k
  int i;
220
4.47k
  const int tlambda_scale = (enc->method >= 4) ? enc->config->sns_strength : 0;
221
4.47k
  const int num_segments = enc->segment_hdr.num_segments;
222
21.0k
  for (i = 0; i < num_segments; ++i) {
223
16.5k
    VP8SegmentInfo* const m = &enc->dqm[i];
224
16.5k
    const int q = m->quant;
225
16.5k
    int q_i4, q_i16, q_uv;
226
16.5k
    m->y1.q[0] = kDcTable[clip(q + enc->dq_y1_dc, 0, 127)];
227
16.5k
    m->y1.q[1] = kAcTable[clip(q, 0, 127)];
228
229
16.5k
    m->y2.q[0] = kDcTable[clip(q + enc->dq_y2_dc, 0, 127)] * 2;
230
16.5k
    m->y2.q[1] = kAcTable2[clip(q + enc->dq_y2_ac, 0, 127)];
231
232
16.5k
    m->uv.q[0] = kDcTable[clip(q + enc->dq_uv_dc, 0, 117)];
233
16.5k
    m->uv.q[1] = kAcTable[clip(q + enc->dq_uv_ac, 0, 127)];
234
235
16.5k
    q_i4 = ExpandMatrix(&m->y1, 0);
236
16.5k
    q_i16 = ExpandMatrix(&m->y2, 1);
237
16.5k
    q_uv = ExpandMatrix(&m->uv, 2);
238
239
16.5k
    m->lambda_i4 = (3 * q_i4 * q_i4) >> 7;
240
16.5k
    m->lambda_i16 = (3 * q_i16 * q_i16);
241
16.5k
    m->lambda_uv = (3 * q_uv * q_uv) >> 6;
242
16.5k
    m->lambda_mode = (1 * q_i4 * q_i4) >> 7;
243
16.5k
    m->lambda_trellis_i4 = (7 * q_i4 * q_i4) >> 3;
244
16.5k
    m->lambda_trellis_i16 = (q_i16 * q_i16) >> 2;
245
16.5k
    m->lambda_trellis_uv = (q_uv * q_uv) << 1;
246
16.5k
    m->tlambda = (tlambda_scale * q_i4) >> 5;
247
248
    // none of these constants should be < 1
249
16.5k
    CheckLambdaValue(&m->lambda_i4);
250
16.5k
    CheckLambdaValue(&m->lambda_i16);
251
16.5k
    CheckLambdaValue(&m->lambda_uv);
252
16.5k
    CheckLambdaValue(&m->lambda_mode);
253
16.5k
    CheckLambdaValue(&m->lambda_trellis_i4);
254
16.5k
    CheckLambdaValue(&m->lambda_trellis_i16);
255
16.5k
    CheckLambdaValue(&m->lambda_trellis_uv);
256
16.5k
    CheckLambdaValue(&m->tlambda);
257
258
16.5k
    m->min_disto = 20 * m->y1.q[0];  // quantization-aware min disto
259
16.5k
    m->max_edge = 0;
260
261
16.5k
    m->i4_penalty = 1000 * q_i4 * q_i4;
262
16.5k
  }
263
4.47k
}
264
265
//------------------------------------------------------------------------------
266
// Initialize filtering parameters
267
268
// Very small filter-strength values have close to no visual effect. So we can
269
// save a little decoding-CPU by turning filtering off for these.
270
17.8k
#define FSTRENGTH_CUTOFF 2
271
272
4.47k
static void SetupFilterStrength(VP8Encoder* const enc) {
273
4.47k
  int i;
274
  // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
275
4.47k
  const int level0 = 5 * enc->config->filter_strength;
276
22.3k
  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
277
17.8k
    VP8SegmentInfo* const m = &enc->dqm[i];
278
    // We focus on the quantization of AC coeffs.
279
17.8k
    const int qstep = kAcTable[clip(m->quant, 0, 127)] >> 2;
280
17.8k
    const int base_strength =
281
17.8k
        VP8FilterStrengthFromDelta(enc->filter_hdr.sharpness, qstep);
282
    // Segments with lower complexity ('beta') will be less filtered.
283
17.8k
    const int f = base_strength * level0 / (256 + m->beta);
284
17.8k
    m->fstrength = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
285
17.8k
  }
286
  // We record the initial strength (mainly for the case of 1-segment only).
287
4.47k
  enc->filter_hdr.level = enc->dqm[0].fstrength;
288
4.47k
  enc->filter_hdr.simple = (enc->config->filter_type == 0);
289
4.47k
  enc->filter_hdr.sharpness = enc->config->filter_sharpness;
290
4.47k
}
291
292
//------------------------------------------------------------------------------
293
294
// Note: if you change the values below, remember that the max range
295
// allowed by the syntax for DQ_UV is [-16,16].
296
8.94k
#define MAX_DQ_UV (6)
297
8.94k
#define MIN_DQ_UV (-4)
298
299
// We want to emulate jpeg-like behaviour where the expected "good" quality
300
// is around q=75. Internally, our "good" middle is around c=50. So we
301
// map accordingly using linear piece-wise function
302
4.47k
static double QualityToCompression(double c) {
303
4.47k
  const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
304
  // The file size roughly scales as pow(quantizer, 3.). Actually, the
305
  // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
306
  // in the mid-quant range. So we scale the compressibility inversely to
307
  // this power-law: quant ~= compression ^ 1/3. This law holds well for
308
  // low quant. Finer modeling for high-quant would make use of kAcTable[]
309
  // more explicitly.
310
4.47k
  const double v = pow(linear_c, 1 / 3.);
311
4.47k
  return v;
312
4.47k
}
313
314
0
static double QualityToJPEGCompression(double c, double alpha) {
315
  // We map the complexity 'alpha' and quality setting 'c' to a compression
316
  // exponent empirically matched to the compression curve of libjpeg6b.
317
  // On average, the WebP output size will be roughly similar to that of a
318
  // JPEG file compressed with same quality factor.
319
0
  const double amin = 0.30;
320
0
  const double amax = 0.85;
321
0
  const double exp_min = 0.4;
322
0
  const double exp_max = 0.9;
323
0
  const double slope = (exp_min - exp_max) / (amax - amin);
324
  // Linearly interpolate 'expn' from exp_min to exp_max
325
  // in the [amin, amax] range.
326
0
  const double expn = (alpha > amax)   ? exp_min
327
0
                      : (alpha < amin) ? exp_max
328
0
                                       : exp_max + slope * (alpha - amin);
329
0
  const double v = pow(c, expn);
330
0
  return v;
331
0
}
332
333
static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
334
25.6k
                                 const VP8SegmentInfo* const S2) {
335
25.6k
  return (S1->quant == S2->quant) && (S1->fstrength == S2->fstrength);
336
25.6k
}
337
338
4.47k
static void SimplifySegments(VP8Encoder* const enc) {
339
4.47k
  int map[NUM_MB_SEGMENTS] = {0, 1, 2, 3};
340
  // 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
341
  // explicit check is needed to avoid a spurious warning about 'i' exceeding
342
  // array bounds of 'dqm' with some compilers (noticed with gcc-4.9).
343
4.47k
  const int num_segments = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS)
344
4.47k
                               ? enc->segment_hdr.num_segments
345
4.47k
                               : NUM_MB_SEGMENTS;
346
4.47k
  int num_final_segments = 1;
347
4.47k
  int s1, s2;
348
17.8k
  for (s1 = 1; s1 < num_segments; ++s1) {  // find similar segments
349
13.4k
    const VP8SegmentInfo* const S1 = &enc->dqm[s1];
350
13.4k
    int found = 0;
351
    // check if we already have similar segment
352
37.7k
    for (s2 = 0; s2 < num_final_segments; ++s2) {
353
25.6k
      const VP8SegmentInfo* const S2 = &enc->dqm[s2];
354
25.6k
      if (SegmentsAreEquivalent(S1, S2)) {
355
1.29k
        found = 1;
356
1.29k
        break;
357
1.29k
      }
358
25.6k
    }
359
13.4k
    map[s1] = s2;
360
13.4k
    if (!found) {
361
12.1k
      if (num_final_segments != s1) {
362
19
        enc->dqm[num_final_segments] = enc->dqm[s1];
363
19
      }
364
12.1k
      ++num_final_segments;
365
12.1k
    }
366
13.4k
  }
367
4.47k
  if (num_final_segments < num_segments) {  // Remap
368
536
    int i = enc->mb_w * enc->mb_h;
369
396k
    while (i-- > 0) enc->mb_info[i].segment = map[enc->mb_info[i].segment];
370
536
    enc->segment_hdr.num_segments = num_final_segments;
371
    // Replicate the trailing segment infos (it's mostly cosmetics)
372
1.83k
    for (i = num_final_segments; i < num_segments; ++i) {
373
1.29k
      enc->dqm[i] = enc->dqm[num_final_segments - 1];
374
1.29k
    }
375
536
  }
376
4.47k
}
377
378
4.47k
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
379
4.47k
  int i;
380
4.47k
  int dq_uv_ac, dq_uv_dc;
381
4.47k
  const int num_segments = enc->segment_hdr.num_segments;
382
4.47k
  const double amp = SNS_TO_DQ * enc->config->sns_strength / 100. / 128.;
383
4.47k
  const double Q = quality / 100.;
384
4.47k
  const double c_base = enc->config->emulate_jpeg_size
385
4.47k
                            ? QualityToJPEGCompression(Q, enc->alpha / 255.)
386
4.47k
                            : QualityToCompression(Q);
387
22.3k
  for (i = 0; i < num_segments; ++i) {
388
    // We modulate the base coefficient to accommodate for the quantization
389
    // susceptibility and allow denser segments to be quantized more.
390
17.8k
    const double expn = 1. - amp * enc->dqm[i].alpha;
391
17.8k
    const double c = pow(c_base, expn);
392
17.8k
    const int q = (int)(127. * (1. - c));
393
17.8k
    assert(expn > 0.);
394
17.8k
    enc->dqm[i].quant = clip(q, 0, 127);
395
17.8k
  }
396
397
  // purely indicative in the bitstream (except for the 1-segment case)
398
4.47k
  enc->base_quant = enc->dqm[0].quant;
399
400
  // fill-in values for the unused segments (required by the syntax)
401
4.47k
  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
402
0
    enc->dqm[i].quant = enc->base_quant;
403
0
  }
404
405
  // uv_alpha is normally spread around ~60. The useful range is
406
  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
407
  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
408
4.47k
  dq_uv_ac = (enc->uv_alpha - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV) /
409
4.47k
             (MAX_ALPHA - MIN_ALPHA);
410
  // we rescale by the user-defined strength of adaptation
411
4.47k
  dq_uv_ac = dq_uv_ac * enc->config->sns_strength / 100;
412
  // and make it safe.
413
4.47k
  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
414
  // We also boost the dc-uv-quant a little, based on sns-strength, since
415
  // U/V channels are quite more reactive to high quants (flat DC-blocks
416
  // tend to appear, and are unpleasant).
417
4.47k
  dq_uv_dc = -4 * enc->config->sns_strength / 100;
418
4.47k
  dq_uv_dc = clip(dq_uv_dc, -15, 15);  // 4bit-signed max allowed
419
420
4.47k
  enc->dq_y1_dc = 0;  // TODO(skal): dq-lum
421
4.47k
  enc->dq_y2_dc = 0;
422
4.47k
  enc->dq_y2_ac = 0;
423
4.47k
  enc->dq_uv_dc = dq_uv_dc;
424
4.47k
  enc->dq_uv_ac = dq_uv_ac;
425
426
4.47k
  SetupFilterStrength(enc);  // initialize segments' filtering, eventually
427
428
4.47k
  if (num_segments > 1) SimplifySegments(enc);
429
430
4.47k
  SetupMatrices(enc);  // finalize quantization matrices
431
4.47k
}
432
433
//------------------------------------------------------------------------------
434
// Form the predictions in cache
435
436
// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
437
const uint16_t VP8I16ModeOffsets[4] = {I16DC16, I16TM16, I16VE16, I16HE16};
438
const uint16_t VP8UVModeOffsets[4] = {C8DC8, C8TM8, C8VE8, C8HE8};
439
440
// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
441
static const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
442
    I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4};
443
444
9.51M
void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
445
9.51M
  const uint8_t* const left = it->x ? it->y_left : NULL;
446
9.51M
  const uint8_t* const top = it->y ? it->y_top : NULL;
447
9.51M
  VP8EncPredLuma16(it->yuv_p, left, top);
448
9.51M
}
449
450
9.51M
void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
451
9.51M
  const uint8_t* const left = it->x ? it->u_left : NULL;
452
9.51M
  const uint8_t* const top = it->y ? it->uv_top : NULL;
453
9.51M
  VP8EncPredChroma8(it->yuv_p, left, top);
454
9.51M
}
455
456
// Form all the ten Intra4x4 predictions in the 'yuv_p' cache
457
// for the 4x4 block it->i4
458
48.6M
static void MakeIntra4Preds(const VP8EncIterator* const it) {
459
48.6M
  VP8EncPredLuma4(it->yuv_p, it->i4_top);
460
48.6M
}
461
462
//------------------------------------------------------------------------------
463
// Quantize
464
465
// Layout:
466
// +----+----+
467
// |YYYY|UUVV| 0
468
// |YYYY|UUVV| 4
469
// |YYYY|....| 8
470
// |YYYY|....| 12
471
// +----+----+
472
473
const uint16_t VP8Scan[16] = {
474
    // Luma
475
    0 + 0 * BPS,  4 + 0 * BPS,  8 + 0 * BPS,  12 + 0 * BPS,
476
    0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
477
    0 + 8 * BPS,  4 + 8 * BPS,  8 + 8 * BPS,  12 + 8 * BPS,
478
    0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
479
};
480
481
static const uint16_t VP8ScanUV[4 + 4] = {
482
    0 + 0 * BPS, 4 + 0 * BPS,  0 + 4 * BPS, 4 + 4 * BPS,  // U
483
    8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS  // V
484
};
485
486
//------------------------------------------------------------------------------
487
// Distortion measurement
488
489
static const uint16_t kWeightY[16] = {38, 32, 20, 9, 32, 28, 17, 7,
490
                                      20, 17, 10, 4, 9,  7,  4,  2};
491
492
static const uint16_t kWeightTrellis[16] = {
493
#if USE_TDISTO == 0
494
    16, 16, 16, 16, 16, 16, 16, 16, 16,
495
    16, 16, 16, 16, 16, 16, 16
496
#else
497
    30, 27, 19, 11, 27, 24, 17, 10, 19,
498
    17, 12, 8,  11, 10, 8,  6
499
#endif
500
};
501
502
// Init/Copy the common fields in score.
503
62.9M
static void InitScore(VP8ModeScore* const rd) {
504
62.9M
  rd->D = 0;
505
62.9M
  rd->SD = 0;
506
62.9M
  rd->R = 0;
507
62.9M
  rd->H = 0;
508
62.9M
  rd->nz = 0;
509
62.9M
  rd->score = MAX_COST;
510
62.9M
}
511
512
static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst,
513
91.9M
                      const VP8ModeScore* WEBP_RESTRICT const src) {
514
91.9M
  dst->D = src->D;
515
91.9M
  dst->SD = src->SD;
516
91.9M
  dst->R = src->R;
517
91.9M
  dst->H = src->H;
518
91.9M
  dst->nz = src->nz;  // note that nz is not accumulated, but just copied.
519
91.9M
  dst->score = src->score;
520
91.9M
}
521
522
static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
523
53.4M
                     const VP8ModeScore* WEBP_RESTRICT const src) {
524
53.4M
  dst->D += src->D;
525
53.4M
  dst->SD += src->SD;
526
53.4M
  dst->R += src->R;
527
53.4M
  dst->H += src->H;
528
53.4M
  dst->nz |= src->nz;  // here, new nz bits are accumulated.
529
53.4M
  dst->score += src->score;
530
53.4M
}
531
532
//------------------------------------------------------------------------------
533
// Performs trellis-optimized quantization.
534
535
// Trellis node
536
typedef struct {
537
  int8_t prev;    // best previous node
538
  int8_t sign;    // sign of coeff_i
539
  int16_t level;  // level
540
} Node;
541
542
// Score state
543
typedef struct {
544
  score_t score;          // partial RD score
545
  const uint16_t* costs;  // shortcut to cost tables
546
} ScoreState;
547
548
// If a coefficient was quantized to a value Q (using a neutral bias),
549
// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
550
// We don't test negative values though.
551
0
#define MIN_DELTA 0  // how much lower level to try
552
0
#define MAX_DELTA 1  // how much higher
553
#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
554
0
#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
555
0
#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
556
557
808M
static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
558
808M
  rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
559
808M
}
560
561
static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
562
0
                                          score_t distortion) {
563
0
  return rate * lambda + RD_DISTO_MULT * distortion;
564
0
}
565
566
// Coefficient type.
567
enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
568
569
static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
570
                                int16_t in[16], int16_t out[16], int ctx0,
571
                                int coeff_type,
572
                                const VP8Matrix* WEBP_RESTRICT const mtx,
573
0
                                int lambda) {
574
0
  const ProbaArray* const probas = enc->proba.coeffs[coeff_type];
575
0
  CostArrayPtr const costs =
576
0
      (CostArrayPtr)enc->proba.remapped_costs[coeff_type];
577
0
  const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
578
0
  Node nodes[16][NUM_NODES];
579
0
  ScoreState score_states[2][NUM_NODES];
580
0
  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
581
0
  ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
582
0
  int best_path[3] = {-1, -1, -1};  // store best-last/best-level/best-previous
583
0
  score_t best_score;
584
0
  int n, m, p, last;
585
586
0
  {
587
0
    score_t cost;
588
0
    const int thresh = mtx->q[1] * mtx->q[1] / 4;
589
0
    const int last_proba = probas[VP8EncBands[first]][ctx0][0];
590
591
    // compute the position of the last interesting coefficient
592
0
    last = first - 1;
593
0
    for (n = 15; n >= first; --n) {
594
0
      const int j = kZigzag[n];
595
0
      const int err = in[j] * in[j];
596
0
      if (err > thresh) {
597
0
        last = n;
598
0
        break;
599
0
      }
600
0
    }
601
    // we don't need to go inspect up to n = 16 coeffs. We can just go up
602
    // to last + 1 (inclusive) without losing much.
603
0
    if (last < 15) ++last;
604
605
    // compute 'skip' score. This is the max score one can do.
606
0
    cost = VP8BitCost(0, last_proba);
607
0
    best_score = RDScoreTrellis(lambda, cost, 0);
608
609
    // initialize source node.
610
0
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
611
0
      const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
612
0
      ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
613
0
      ss_cur[m].costs = costs[first][ctx0];
614
0
    }
615
0
  }
616
617
  // traverse trellis.
618
0
  for (n = first; n <= last; ++n) {
619
0
    const int j = kZigzag[n];
620
0
    const uint32_t Q = mtx->q[j];
621
0
    const uint32_t iQ = mtx->iq[j];
622
0
    const uint32_t B = BIAS(0x00);  // neutral bias
623
    // note: it's important to take sign of the _original_ coeff,
624
    // so we don't have to consider level < 0 afterward.
625
0
    const int sign = (in[j] < 0);
626
0
    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen[j];
627
0
    int level0 = QUANTDIV(coeff0, iQ, B);
628
0
    int thresh_level = QUANTDIV(coeff0, iQ, BIAS(0x80));
629
0
    if (thresh_level > MAX_LEVEL) thresh_level = MAX_LEVEL;
630
0
    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
631
632
0
    {  // Swap current and previous score states
633
0
      ScoreState* const tmp = ss_cur;
634
0
      ss_cur = ss_prev;
635
0
      ss_prev = tmp;
636
0
    }
637
638
    // test all alternate level values around level0.
639
0
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
640
0
      Node* const cur = &NODE(n, m);
641
0
      const int level = level0 + m;
642
0
      const int ctx = (level > 2) ? 2 : level;
643
0
      const int band = VP8EncBands[n + 1];
644
0
      score_t base_score;
645
0
      score_t best_cur_score;
646
0
      int best_prev;
647
0
      score_t cost, score;
648
649
      // costs is [16][NUM_CTX == 3] but ss_cur[m].costs is only read after
650
      // being swapped with ss_prev: the last value can be NULL.
651
0
      if (n + 1 < 16) {
652
0
        ss_cur[m].costs = costs[n + 1][ctx];
653
0
      } else {
654
0
        ss_cur[m].costs = NULL;
655
0
      }
656
0
      if (level < 0 || level > thresh_level) {
657
0
        ss_cur[m].score = MAX_COST;
658
        // Node is dead.
659
0
        continue;
660
0
      }
661
662
0
      {
663
        // Compute delta_error = how much coding this level will
664
        // subtract to max_error as distortion.
665
        // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
666
0
        const int new_error = coeff0 - level * Q;
667
0
        const int delta_error =
668
0
            kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
669
0
        base_score = RDScoreTrellis(lambda, 0, delta_error);
670
0
      }
671
672
      // Inspect all possible non-dead predecessors. Retain only the best one.
673
      // The base_score is added to all scores so it is only added for the final
674
      // value after the loop.
675
0
      cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level);
676
0
      best_cur_score =
677
0
          ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0);
678
0
      best_prev = -MIN_DELTA;
679
0
      for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) {
680
        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
681
        // eliminated since their score can't be better than the current best.
682
0
        cost = VP8LevelCost(ss_prev[p].costs, level);
683
        // Examine node assuming it's a non-terminal one.
684
0
        score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
685
0
        if (score < best_cur_score) {
686
0
          best_cur_score = score;
687
0
          best_prev = p;
688
0
        }
689
0
      }
690
0
      best_cur_score += base_score;
691
      // Store best finding in current node.
692
0
      cur->sign = sign;
693
0
      cur->level = level;
694
0
      cur->prev = best_prev;
695
0
      ss_cur[m].score = best_cur_score;
696
697
      // Now, record best terminal node (and thus best entry in the graph).
698
0
      if (level != 0 && best_cur_score < best_score) {
699
0
        const score_t last_pos_cost =
700
0
            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
701
0
        const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
702
0
        score = best_cur_score + last_pos_score;
703
0
        if (score < best_score) {
704
0
          best_score = score;
705
0
          best_path[0] = n;          // best eob position
706
0
          best_path[1] = m;          // best node index
707
0
          best_path[2] = best_prev;  // best predecessor
708
0
        }
709
0
      }
710
0
    }
711
0
  }
712
713
  // Fresh start
714
  // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case.
715
0
  if (coeff_type == TYPE_I16_AC) {
716
0
    memset(in + 1, 0, 15 * sizeof(*in));
717
0
    memset(out + 1, 0, 15 * sizeof(*out));
718
0
  } else {
719
0
    memset(in, 0, 16 * sizeof(*in));
720
0
    memset(out, 0, 16 * sizeof(*out));
721
0
  }
722
0
  if (best_path[0] == -1) {
723
0
    return 0;  // skip!
724
0
  }
725
726
0
  {
727
    // Unwind the best path.
728
    // Note: best-prev on terminal node is not necessarily equal to the
729
    // best_prev for non-terminal. So we patch best_path[2] in.
730
0
    int nz = 0;
731
0
    int best_node = best_path[1];
732
0
    n = best_path[0];
733
0
    NODE(n, best_node).prev = best_path[2];  // force best-prev for terminal
734
735
0
    for (; n >= first; --n) {
736
0
      const Node* const node = &NODE(n, best_node);
737
0
      const int j = kZigzag[n];
738
0
      out[n] = node->sign ? -node->level : node->level;
739
0
      nz |= node->level;
740
0
      in[j] = out[n] * mtx->q[j];
741
0
      best_node = node->prev;
742
0
    }
743
0
    return (nz != 0);
744
0
  }
745
0
}
746
747
#undef NODE
748
749
//------------------------------------------------------------------------------
750
// Performs: difference, transform, quantize, back-transform, add
751
// all at once. Output is the reconstructed block in *yuv_out, and the
752
// quantized levels in *levels.
753
754
static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
755
                              VP8ModeScore* WEBP_RESTRICT const rd,
756
19.0M
                              uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
757
19.0M
  const VP8Encoder* const enc = it->enc;
758
19.0M
  const uint8_t* const ref = it->yuv_p + VP8I16ModeOffsets[mode];
759
19.0M
  const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
760
19.0M
  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
761
19.0M
  int nz = 0;
762
19.0M
  int n;
763
19.0M
  int16_t tmp[16][16], dc_tmp[16];
764
765
171M
  for (n = 0; n < 16; n += 2) {
766
152M
    VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
767
152M
  }
768
19.0M
  VP8FTransformWHT(tmp[0], dc_tmp);
769
19.0M
  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2) << 24;
770
771
19.0M
  if (DO_TRELLIS_I16 && it->do_trellis) {
772
0
    int x, y;
773
0
    VP8IteratorNzToBytes(it);
774
0
    for (y = 0, n = 0; y < 4; ++y) {
775
0
      for (x = 0; x < 4; ++x, ++n) {
776
0
        const int ctx = it->top_nz[x] + it->left_nz[y];
777
0
        const int non_zero = TrellisQuantizeBlock(
778
0
            enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1,
779
0
            dqm->lambda_trellis_i16);
780
0
        it->top_nz[x] = it->left_nz[y] = non_zero;
781
0
        rd->y_ac_levels[n][0] = 0;
782
0
        nz |= non_zero << n;
783
0
      }
784
0
    }
785
19.0M
  } else {
786
171M
    for (n = 0; n < 16; n += 2) {
787
      // Zero-out the first coeff, so that: a) nz is correct below, and
788
      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
789
152M
      tmp[n][0] = tmp[n + 1][0] = 0;
790
152M
      nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1) << n;
791
152M
      assert(rd->y_ac_levels[n + 0][0] == 0);
792
152M
      assert(rd->y_ac_levels[n + 1][0] == 0);
793
152M
    }
794
19.0M
  }
795
796
  // Transform back
797
19.0M
  VP8TransformWHT(dc_tmp, tmp[0]);
798
171M
  for (n = 0; n < 16; n += 2) {
799
152M
    VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
800
152M
  }
801
802
19.0M
  return nz;
803
19.0M
}
804
805
static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
806
                             int16_t levels[16],
807
                             const uint8_t* WEBP_RESTRICT const src,
808
486M
                             uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
809
486M
  const VP8Encoder* const enc = it->enc;
810
486M
  const uint8_t* const ref = it->yuv_p + VP8I4ModeOffsets[mode];
811
486M
  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
812
486M
  int nz = 0;
813
486M
  int16_t tmp[16];
814
815
486M
  VP8FTransform(src, ref, tmp);
816
486M
  if (DO_TRELLIS_I4 && it->do_trellis) {
817
0
    const int x = it->i4 & 3, y = it->i4 >> 2;
818
0
    const int ctx = it->top_nz[x] + it->left_nz[y];
819
0
    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1,
820
0
                              dqm->lambda_trellis_i4);
821
486M
  } else {
822
486M
    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1);
823
486M
  }
824
486M
  VP8ITransform(ref, tmp, yuv_out, 0);
825
486M
  return nz;
826
486M
}
827
828
//------------------------------------------------------------------------------
829
// DC-error diffusion
830
831
// Diffusion weights. We under-correct a bit (15/16th of the error is actually
832
// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
833
152M
#define C1 7  // fraction of error sent to the 4x4 block below
834
152M
#define C2 8  // fraction of error sent to the 4x4 block on the right
835
152M
#define DSHIFT 4
836
304M
#define DSCALE 1  // storage descaling, needed to make the error fit int8_t
837
838
// Quantize as usual, but also compute and return the quantization error.
839
// Error is already divided by DSHIFT.
840
static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
841
152M
                          const VP8Matrix* WEBP_RESTRICT const mtx) {
842
152M
  int V = *v;
843
152M
  const int sign = (V < 0);
844
152M
  if (sign) V = -V;
845
152M
  if (V > (int)mtx->zthresh[0]) {
846
57.5M
    const int qV = QUANTDIV(V, mtx->iq[0], mtx->bias[0]) * mtx->q[0];
847
57.5M
    const int err = (V - qV);
848
57.5M
    *v = sign ? -qV : qV;
849
57.5M
    return (sign ? -err : err) >> DSCALE;
850
57.5M
  }
851
94.7M
  *v = 0;
852
94.7M
  return (sign ? -V : V) >> DSCALE;
853
152M
}
854
855
static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
856
                            const VP8Matrix* WEBP_RESTRICT const mtx,
857
                            int16_t tmp[][16],
858
19.0M
                            VP8ModeScore* WEBP_RESTRICT const rd) {
859
  //         | top[0] | top[1]
860
  // --------+--------+---------
861
  // left[0] | tmp[0]   tmp[1]  <->   err0 err1
862
  // left[1] | tmp[2]   tmp[3]        err2 err3
863
  //
864
  // Final errors {err1,err2,err3} are preserved and later restored
865
  // as top[]/left[] on the next block.
866
19.0M
  int ch;
867
57.1M
  for (ch = 0; ch <= 1; ++ch) {
868
38.0M
    const int8_t* const top = it->top_derr[it->x][ch];
869
38.0M
    const int8_t* const left = it->left_derr[ch];
870
38.0M
    int16_t(*const c)[16] = &tmp[ch * 4];
871
38.0M
    int err0, err1, err2, err3;
872
38.0M
    c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
873
38.0M
    err0 = QuantizeSingle(&c[0][0], mtx);
874
38.0M
    c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE);
875
38.0M
    err1 = QuantizeSingle(&c[1][0], mtx);
876
38.0M
    c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE);
877
38.0M
    err2 = QuantizeSingle(&c[2][0], mtx);
878
38.0M
    c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
879
38.0M
    err3 = QuantizeSingle(&c[3][0], mtx);
880
    // error 'err' is bounded by mtx->q[0] which is 132 at max. Hence
881
    // err >> DSCALE will fit in an int8_t type if DSCALE>=1.
882
38.0M
    assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
883
38.0M
    rd->derr[ch][0] = (int8_t)err1;
884
38.0M
    rd->derr[ch][1] = (int8_t)err2;
885
38.0M
    rd->derr[ch][2] = (int8_t)err3;
886
38.0M
  }
887
19.0M
}
888
889
static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
890
4.75M
                                 const VP8ModeScore* WEBP_RESTRICT const rd) {
891
4.75M
  int ch;
892
14.2M
  for (ch = 0; ch <= 1; ++ch) {
893
9.51M
    int8_t* const top = it->top_derr[it->x][ch];
894
9.51M
    int8_t* const left = it->left_derr[ch];
895
9.51M
    left[0] = rd->derr[ch][0];           // restore err1
896
9.51M
    left[1] = 3 * rd->derr[ch][2] >> 2;  //     ... 3/4th of err3
897
9.51M
    top[0] = rd->derr[ch][1];            //     ... err2
898
9.51M
    top[1] = rd->derr[ch][2] - left[1];  //     ... 1/4th of err3.
899
9.51M
  }
900
4.75M
}
901
902
#undef C1
903
#undef C2
904
#undef DSHIFT
905
#undef DSCALE
906
907
//------------------------------------------------------------------------------
908
909
static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
910
                         VP8ModeScore* WEBP_RESTRICT const rd,
911
19.0M
                         uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
912
19.0M
  const VP8Encoder* const enc = it->enc;
913
19.0M
  const uint8_t* const ref = it->yuv_p + VP8UVModeOffsets[mode];
914
19.0M
  const uint8_t* const src = it->yuv_in + U_OFF_ENC;
915
19.0M
  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
916
19.0M
  int nz = 0;
917
19.0M
  int n;
918
19.0M
  int16_t tmp[8][16];
919
920
95.1M
  for (n = 0; n < 8; n += 2) {
921
76.1M
    VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
922
76.1M
  }
923
19.0M
  if (it->top_derr != NULL) CorrectDCValues(it, &dqm->uv, tmp, rd);
924
925
19.0M
  if (DO_TRELLIS_UV && it->do_trellis) {
926
0
    int ch, x, y;
927
0
    for (ch = 0, n = 0; ch <= 2; ch += 2) {
928
0
      for (y = 0; y < 2; ++y) {
929
0
        for (x = 0; x < 2; ++x, ++n) {
930
0
          const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
931
0
          const int non_zero = TrellisQuantizeBlock(
932
0
              enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv,
933
0
              dqm->lambda_trellis_uv);
934
0
          it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] = non_zero;
935
0
          nz |= non_zero << n;
936
0
        }
937
0
      }
938
0
    }
939
19.0M
  } else {
940
95.1M
    for (n = 0; n < 8; n += 2) {
941
76.1M
      nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv) << n;
942
76.1M
    }
943
19.0M
  }
944
945
95.1M
  for (n = 0; n < 8; n += 2) {
946
76.1M
    VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
947
76.1M
  }
948
19.0M
  return (nz << 16);
949
19.0M
}
950
951
//------------------------------------------------------------------------------
952
// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
953
// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
954
955
207k
static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
956
  // We look at the first three AC coefficients to determine what is the average
957
  // delta between each sub-4x4 block.
958
207k
  const int v0 = abs(DCs[1]);
959
207k
  const int v1 = abs(DCs[2]);
960
207k
  const int v2 = abs(DCs[4]);
961
207k
  int max_v = (v1 > v0) ? v1 : v0;
962
207k
  max_v = (v2 > max_v) ? v2 : max_v;
963
207k
  if (max_v > dqm->max_edge) dqm->max_edge = max_v;
964
207k
}
965
966
7.03M
static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
967
7.03M
  VP8ModeScore* const tmp = *a;
968
7.03M
  *a = *b;
969
7.03M
  *b = tmp;
970
7.03M
}
971
972
98.9M
static void SwapPtr(uint8_t** a, uint8_t** b) {
973
98.9M
  uint8_t* const tmp = *a;
974
98.9M
  *a = *b;
975
98.9M
  *b = tmp;
976
98.9M
}
977
978
8.53M
static void SwapOut(VP8EncIterator* const it) {
979
8.53M
  SwapPtr(&it->yuv_out, &it->yuv_out2);
980
8.53M
}
981
982
static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
983
4.75M
                            VP8ModeScore* WEBP_RESTRICT rd) {
984
4.75M
  const int kNumBlocks = 16;
985
4.75M
  VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
986
4.75M
  const int lambda = dqm->lambda_i16;
987
4.75M
  const int tlambda = dqm->tlambda;
988
4.75M
  const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
989
4.75M
  VP8ModeScore rd_tmp;
990
4.75M
  VP8ModeScore* rd_cur = &rd_tmp;
991
4.75M
  VP8ModeScore* rd_best = rd;
992
4.75M
  int mode;
993
4.75M
  int is_flat = IsFlatSource16(it->yuv_in + Y_OFF_ENC);
994
995
4.75M
  rd->mode_i16 = -1;
996
23.7M
  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
997
19.0M
    uint8_t* const tmp_dst = it->yuv_out2 + Y_OFF_ENC;  // scratch buffer
998
19.0M
    rd_cur->mode_i16 = mode;
999
1000
    // Reconstruct
1001
19.0M
    rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
1002
1003
    // Measure RD-score
1004
19.0M
    rd_cur->D = VP8SSE16x16(src, tmp_dst);
1005
19.0M
    rd_cur->SD =
1006
19.0M
        tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
1007
19.0M
    rd_cur->H = VP8FixedCostsI16[mode];
1008
19.0M
    rd_cur->R = VP8GetCostLuma16(it, rd_cur);
1009
19.0M
    if (is_flat) {
1010
      // refine the first impression (which was in pixel space)
1011
6.05M
      is_flat = IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16);
1012
6.05M
      if (is_flat) {
1013
        // Block is very flat. We put emphasis on the distortion being very low!
1014
6.05M
        rd_cur->D *= 2;
1015
6.05M
        rd_cur->SD *= 2;
1016
6.05M
      }
1017
6.05M
    }
1018
1019
    // Since we always examine Intra16 first, we can overwrite *rd directly.
1020
19.0M
    SetRDScore(lambda, rd_cur);
1021
19.0M
    if (mode == 0 || rd_cur->score < rd_best->score) {
1022
7.03M
      SwapModeScore(&rd_cur, &rd_best);
1023
7.03M
      SwapOut(it);
1024
7.03M
    }
1025
19.0M
  }
1026
4.75M
  if (rd_best != rd) {
1027
3.21M
    memcpy(rd, rd_best, sizeof(*rd));
1028
3.21M
  }
1029
4.75M
  SetRDScore(dqm->lambda_mode, rd);  // finalize score for mode decision.
1030
4.75M
  VP8SetIntra16Mode(it, rd->mode_i16);
1031
1032
  // we have a blocky macroblock (only DCs are non-zero) with fairly high
1033
  // distortion, record max delta so we can later adjust the minimal filtering
1034
  // strength needed to smooth these blocks out.
1035
4.75M
  if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto) {
1036
207k
    StoreMaxDelta(dqm, rd->y_dc_levels);
1037
207k
  }
1038
4.75M
}
1039
1040
//------------------------------------------------------------------------------
1041
1042
// return the cost array corresponding to the surrounding prediction modes.
1043
static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
1044
48.6M
                                     const uint8_t modes[16]) {
1045
48.6M
  const int preds_w = it->enc->preds_w;
1046
48.6M
  const int x = (it->i4 & 3), y = it->i4 >> 2;
1047
48.6M
  const int left = (x == 0) ? it->preds[y * preds_w - 1] : modes[it->i4 - 1];
1048
48.6M
  const int top = (y == 0) ? it->preds[-preds_w + x] : modes[it->i4 - 4];
1049
48.6M
  return VP8FixedCostsI4[top][left];
1050
48.6M
}
1051
1052
static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
1053
4.75M
                          VP8ModeScore* WEBP_RESTRICT const rd) {
1054
4.75M
  const VP8Encoder* const enc = it->enc;
1055
4.75M
  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
1056
4.75M
  const int lambda = dqm->lambda_i4;
1057
4.75M
  const int tlambda = dqm->tlambda;
1058
4.75M
  const uint8_t* const src0 = it->yuv_in + Y_OFF_ENC;
1059
4.75M
  uint8_t* const best_blocks = it->yuv_out2 + Y_OFF_ENC;
1060
4.75M
  int total_header_bits = 0;
1061
4.75M
  VP8ModeScore rd_best;
1062
1063
4.75M
  if (enc->max_i4_header_bits == 0) {
1064
0
    return 0;
1065
0
  }
1066
1067
4.75M
  InitScore(&rd_best);
1068
4.75M
  rd_best.H = 211;  // '211' is the value of VP8BitCost(0, 145)
1069
4.75M
  SetRDScore(dqm->lambda_mode, &rd_best);
1070
4.75M
  VP8IteratorStartI4(it);
1071
48.6M
  do {
1072
48.6M
    const int kNumBlocks = 1;
1073
48.6M
    VP8ModeScore rd_i4;
1074
48.6M
    int mode;
1075
48.6M
    int best_mode = -1;
1076
48.6M
    const uint8_t* const src = src0 + VP8Scan[it->i4];
1077
48.6M
    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
1078
48.6M
    uint8_t* best_block = best_blocks + VP8Scan[it->i4];
1079
48.6M
    uint8_t* tmp_dst = it->yuv_p + I4TMP;  // scratch buffer.
1080
1081
48.6M
    InitScore(&rd_i4);
1082
48.6M
    MakeIntra4Preds(it);
1083
535M
    for (mode = 0; mode < NUM_BMODES; ++mode) {
1084
486M
      VP8ModeScore rd_tmp;
1085
486M
      int16_t tmp_levels[16];
1086
1087
      // Reconstruct
1088
486M
      rd_tmp.nz = ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode)
1089
486M
                  << it->i4;
1090
1091
      // Compute RD-score
1092
486M
      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
1093
486M
      rd_tmp.SD =
1094
486M
          tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) : 0;
1095
486M
      rd_tmp.H = mode_costs[mode];
1096
1097
      // Add flatness penalty, to avoid flat area to be mispredicted
1098
      // by a complex mode.
1099
486M
      if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
1100
295M
        rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
1101
295M
      } else {
1102
191M
        rd_tmp.R = 0;
1103
191M
      }
1104
1105
      // early-out check
1106
486M
      SetRDScore(lambda, &rd_tmp);
1107
486M
      if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
1108
1109
      // finish computing score
1110
225M
      rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
1111
225M
      SetRDScore(lambda, &rd_tmp);
1112
1113
225M
      if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
1114
84.2M
        CopyScore(&rd_i4, &rd_tmp);
1115
84.2M
        best_mode = mode;
1116
84.2M
        SwapPtr(&tmp_dst, &best_block);
1117
84.2M
        memcpy(rd_best.y_ac_levels[it->i4], tmp_levels,
1118
84.2M
               sizeof(rd_best.y_ac_levels[it->i4]));
1119
84.2M
      }
1120
225M
    }
1121
48.6M
    SetRDScore(dqm->lambda_mode, &rd_i4);
1122
48.6M
    AddScore(&rd_best, &rd_i4);
1123
48.6M
    if (rd_best.score >= rd->score) {
1124
3.26M
      return 0;
1125
3.26M
    }
1126
45.3M
    total_header_bits += (int)rd_i4.H;  // <- equal to mode_costs[best_mode];
1127
45.3M
    if (total_header_bits > enc->max_i4_header_bits) {
1128
0
      return 0;
1129
0
    }
1130
    // Copy selected samples if not in the right place already.
1131
45.3M
    if (best_block != best_blocks + VP8Scan[it->i4]) {
1132
21.5M
      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4]);
1133
21.5M
    }
1134
45.3M
    rd->modes_i4[it->i4] = best_mode;
1135
45.3M
    it->top_nz[it->i4 & 3] = it->left_nz[it->i4 >> 2] = (rd_i4.nz ? 1 : 0);
1136
45.3M
  } while (VP8IteratorRotateI4(it, best_blocks));
1137
1138
  // finalize state
1139
1.49M
  CopyScore(rd, &rd_best);
1140
1.49M
  VP8SetIntra4Mode(it, rd->modes_i4);
1141
1.49M
  SwapOut(it);
1142
1.49M
  memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
1143
1.49M
  return 1;  // select intra4x4 over intra16x16
1144
4.75M
}
1145
1146
//------------------------------------------------------------------------------
1147
1148
static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
1149
4.75M
                       VP8ModeScore* WEBP_RESTRICT const rd) {
1150
4.75M
  const int kNumBlocks = 8;
1151
4.75M
  const VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
1152
4.75M
  const int lambda = dqm->lambda_uv;
1153
4.75M
  const uint8_t* const src = it->yuv_in + U_OFF_ENC;
1154
4.75M
  uint8_t* tmp_dst = it->yuv_out2 + U_OFF_ENC;  // scratch buffer
1155
4.75M
  uint8_t* dst0 = it->yuv_out + U_OFF_ENC;
1156
4.75M
  uint8_t* dst = dst0;
1157
4.75M
  VP8ModeScore rd_best;
1158
4.75M
  int mode;
1159
1160
4.75M
  rd->mode_uv = -1;
1161
4.75M
  InitScore(&rd_best);
1162
23.7M
  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1163
19.0M
    VP8ModeScore rd_uv;
1164
1165
    // Reconstruct
1166
19.0M
    rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
1167
1168
    // Compute RD-score
1169
19.0M
    rd_uv.D = VP8SSE16x8(src, tmp_dst);
1170
19.0M
    rd_uv.SD = 0;  // not calling TDisto here: it tends to flatten areas.
1171
19.0M
    rd_uv.H = VP8FixedCostsUV[mode];
1172
19.0M
    rd_uv.R = VP8GetCostUV(it, &rd_uv);
1173
19.0M
    if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
1174
10.4M
      rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
1175
10.4M
    }
1176
1177
19.0M
    SetRDScore(lambda, &rd_uv);
1178
19.0M
    if (mode == 0 || rd_uv.score < rd_best.score) {
1179
6.16M
      CopyScore(&rd_best, &rd_uv);
1180
6.16M
      rd->mode_uv = mode;
1181
6.16M
      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
1182
6.16M
      if (it->top_derr != NULL) {
1183
6.16M
        memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
1184
6.16M
      }
1185
6.16M
      SwapPtr(&dst, &tmp_dst);
1186
6.16M
    }
1187
19.0M
  }
1188
4.75M
  VP8SetIntraUVMode(it, rd->mode_uv);
1189
4.75M
  AddScore(rd, &rd_best);
1190
4.75M
  if (dst != dst0) {  // copy 16x8 block if needed
1191
3.99M
    VP8Copy16x8(dst, dst0);
1192
3.99M
  }
1193
4.75M
  if (it->top_derr != NULL) {  // store diffusion errors for next block
1194
4.75M
    StoreDiffusionErrors(it, rd);
1195
4.75M
  }
1196
4.75M
}
1197
1198
//------------------------------------------------------------------------------
1199
// Final reconstruction and quantization.
1200
1201
static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
1202
0
                           VP8ModeScore* WEBP_RESTRICT const rd) {
1203
0
  const VP8Encoder* const enc = it->enc;
1204
0
  const int is_i16 = (it->mb->type == 1);
1205
0
  int nz = 0;
1206
1207
0
  if (is_i16) {
1208
0
    nz = ReconstructIntra16(it, rd, it->yuv_out + Y_OFF_ENC, it->preds[0]);
1209
0
  } else {
1210
0
    VP8IteratorStartI4(it);
1211
0
    do {
1212
0
      const int mode = it->preds[(it->i4 & 3) + (it->i4 >> 2) * enc->preds_w];
1213
0
      const uint8_t* const src = it->yuv_in + Y_OFF_ENC + VP8Scan[it->i4];
1214
0
      uint8_t* const dst = it->yuv_out + Y_OFF_ENC + VP8Scan[it->i4];
1215
0
      MakeIntra4Preds(it);
1216
0
      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4], src, dst, mode)
1217
0
            << it->i4;
1218
0
    } while (VP8IteratorRotateI4(it, it->yuv_out + Y_OFF_ENC));
1219
0
  }
1220
1221
0
  nz |= ReconstructUV(it, rd, it->yuv_out + U_OFF_ENC, it->mb->uv_mode);
1222
0
  rd->nz = nz;
1223
0
}
1224
1225
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
1226
static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
1227
                                  int try_both_modes, int refine_uv_mode,
1228
0
                                  VP8ModeScore* WEBP_RESTRICT const rd) {
1229
0
  score_t best_score = MAX_COST;
1230
0
  int nz = 0;
1231
0
  int mode;
1232
0
  int is_i16 = try_both_modes || (it->mb->type == 1);
1233
1234
0
  const VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
1235
  // Some empiric constants, of approximate order of magnitude.
1236
0
  const int lambda_d_i16 = 106;
1237
0
  const int lambda_d_i4 = 11;
1238
0
  const int lambda_d_uv = 120;
1239
0
  score_t score_i4 = dqm->i4_penalty;
1240
0
  score_t i4_bit_sum = 0;
1241
0
  const score_t bit_limit = try_both_modes ? it->enc->mb_header_limit
1242
0
                                           : MAX_COST;  // no early-out allowed
1243
1244
0
  if (is_i16) {  // First, evaluate Intra16 distortion
1245
0
    int best_mode = -1;
1246
0
    const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
1247
0
    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1248
0
      const uint8_t* const ref = it->yuv_p + VP8I16ModeOffsets[mode];
1249
0
      const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT +
1250
0
                            VP8FixedCostsI16[mode] * lambda_d_i16;
1251
0
      if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
1252
0
        continue;
1253
0
      }
1254
1255
0
      if (score < best_score) {
1256
0
        best_mode = mode;
1257
0
        best_score = score;
1258
0
      }
1259
0
    }
1260
0
    if (it->x == 0 || it->y == 0) {
1261
      // avoid starting a checkerboard resonance from the border. See bug #432.
1262
0
      if (IsFlatSource16(src)) {
1263
0
        best_mode = (it->x == 0) ? 0 : 2;
1264
0
        try_both_modes = 0;  // stick to i16
1265
0
      }
1266
0
    }
1267
0
    VP8SetIntra16Mode(it, best_mode);
1268
    // we'll reconstruct later, if i16 mode actually gets selected
1269
0
  }
1270
1271
  // Next, evaluate Intra4
1272
0
  if (try_both_modes || !is_i16) {
1273
    // We don't evaluate the rate here, but just account for it through a
1274
    // constant penalty (i4 mode usually needs more bits compared to i16).
1275
0
    is_i16 = 0;
1276
0
    VP8IteratorStartI4(it);
1277
0
    do {
1278
0
      int best_i4_mode = -1;
1279
0
      score_t best_i4_score = MAX_COST;
1280
0
      const uint8_t* const src = it->yuv_in + Y_OFF_ENC + VP8Scan[it->i4];
1281
0
      const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
1282
1283
0
      MakeIntra4Preds(it);
1284
0
      for (mode = 0; mode < NUM_BMODES; ++mode) {
1285
0
        const uint8_t* const ref = it->yuv_p + VP8I4ModeOffsets[mode];
1286
0
        const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT +
1287
0
                              mode_costs[mode] * lambda_d_i4;
1288
0
        if (score < best_i4_score) {
1289
0
          best_i4_mode = mode;
1290
0
          best_i4_score = score;
1291
0
        }
1292
0
      }
1293
0
      i4_bit_sum += mode_costs[best_i4_mode];
1294
0
      rd->modes_i4[it->i4] = best_i4_mode;
1295
0
      score_i4 += best_i4_score;
1296
0
      if (score_i4 >= best_score || i4_bit_sum > bit_limit) {
1297
        // Intra4 won't be better than Intra16. Bail out and pick Intra16.
1298
0
        is_i16 = 1;
1299
0
        break;
1300
0
      } else {  // reconstruct partial block inside yuv_out2 buffer
1301
0
        uint8_t* const tmp_dst = it->yuv_out2 + Y_OFF_ENC + VP8Scan[it->i4];
1302
0
        nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4], src, tmp_dst,
1303
0
                                best_i4_mode)
1304
0
              << it->i4;
1305
0
      }
1306
0
    } while (VP8IteratorRotateI4(it, it->yuv_out2 + Y_OFF_ENC));
1307
0
  }
1308
1309
  // Final reconstruction, depending on which mode is selected.
1310
0
  if (!is_i16) {
1311
0
    VP8SetIntra4Mode(it, rd->modes_i4);
1312
0
    SwapOut(it);
1313
0
    best_score = score_i4;
1314
0
  } else {
1315
0
    nz = ReconstructIntra16(it, rd, it->yuv_out + Y_OFF_ENC, it->preds[0]);
1316
0
  }
1317
1318
  // ... and UV!
1319
0
  if (refine_uv_mode) {
1320
0
    int best_mode = -1;
1321
0
    score_t best_uv_score = MAX_COST;
1322
0
    const uint8_t* const src = it->yuv_in + U_OFF_ENC;
1323
0
    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1324
0
      const uint8_t* const ref = it->yuv_p + VP8UVModeOffsets[mode];
1325
0
      const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT +
1326
0
                            VP8FixedCostsUV[mode] * lambda_d_uv;
1327
0
      if (score < best_uv_score) {
1328
0
        best_mode = mode;
1329
0
        best_uv_score = score;
1330
0
      }
1331
0
    }
1332
0
    VP8SetIntraUVMode(it, best_mode);
1333
0
  }
1334
0
  nz |= ReconstructUV(it, rd, it->yuv_out + U_OFF_ENC, it->mb->uv_mode);
1335
1336
0
  rd->nz = nz;
1337
0
  rd->score = best_score;
1338
0
}
1339
1340
//------------------------------------------------------------------------------
1341
// Entry point
1342
1343
int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
1344
4.75M
                VP8ModeScore* WEBP_RESTRICT const rd, VP8RDLevel rd_opt) {
1345
4.75M
  int is_skipped;
1346
4.75M
  const int method = it->enc->method;
1347
1348
4.75M
  InitScore(rd);
1349
1350
  // We can perform predictions for Luma16x16 and Chroma8x8 already.
1351
  // Luma4x4 predictions needs to be done as-we-go.
1352
4.75M
  VP8MakeLuma16Preds(it);
1353
4.75M
  VP8MakeChroma8Preds(it);
1354
1355
4.75M
  if (rd_opt > RD_OPT_NONE) {
1356
4.75M
    it->do_trellis = (rd_opt >= RD_OPT_TRELLIS_ALL);
1357
4.75M
    PickBestIntra16(it, rd);
1358
4.75M
    if (method >= 2) {
1359
4.75M
      PickBestIntra4(it, rd);
1360
4.75M
    }
1361
4.75M
    PickBestUV(it, rd);
1362
4.75M
    if (rd_opt == RD_OPT_TRELLIS) {  // finish off with trellis-optim now
1363
0
      it->do_trellis = 1;
1364
0
      SimpleQuantize(it, rd);
1365
0
    }
1366
4.75M
  } else {
1367
    // At this point we have heuristically decided intra16 / intra4.
1368
    // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
1369
    // For method <= 1, we don't re-examine the decision but just go ahead with
1370
    // quantization/reconstruction.
1371
0
    RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
1372
0
  }
1373
4.75M
  is_skipped = (rd->nz == 0);
1374
4.75M
  VP8SetSkip(it, is_skipped);
1375
4.75M
  return is_skipped;
1376
4.75M
}