/src/libwebp/src/enc/analysis_enc.c

Source
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Macroblock analysis
//
// Author: Skal (pascal.massimino@gmail.com)

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "src/dec/common_dec.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/thread_utils.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/types.h"

#define MAX_ITERS_K_MEANS 6

//------------------------------------------------------------------------------
// Smooth the segment map by replacing isolated block by the majority of its
// neighbours.

static void SmoothSegmentMap(VP8Encoder* const enc) {
  int n, x, y;
  const int w = enc->mb_w;
  const int h = enc->mb_h;
  const int majority_cnt_3_x_3_grid = 5;
  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
  assert((uint64_t)(w * h) == (uint64_t)w * h);  // no overflow, as per spec

  if (tmp == NULL) return;
  for (y = 1; y < h - 1; ++y) {
    for (x = 1; x < w - 1; ++x) {
      int cnt[NUM_MB_SEGMENTS] = {0};
      const VP8MBInfo* const mb = &enc->mb_info[x + w * y];
      int majority_seg = mb->segment;
      // Check the 8 neighbouring segment values.
      cnt[mb[-w - 1].segment]++;  // top-left
      cnt[mb[-w + 0].segment]++;  // top
      cnt[mb[-w + 1].segment]++;  // top-right
      cnt[mb[-1].segment]++;      // left
      cnt[mb[+1].segment]++;      // right
      cnt[mb[w - 1].segment]++;   // bottom-left
      cnt[mb[w + 0].segment]++;   // bottom
      cnt[mb[w + 1].segment]++;   // bottom-right
      for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
        if (cnt[n] >= majority_cnt_3_x_3_grid) {
          majority_seg = n;
          break;
        }
      }
      tmp[x + y * w] = majority_seg;
    }
  }
  for (y = 1; y < h - 1; ++y) {
    for (x = 1; x < w - 1; ++x) {
      VP8MBInfo* const mb = &enc->mb_info[x + w * y];
      mb->segment = tmp[x + y * w];
    }
  }
  WebPSafeFree(tmp);
}

//------------------------------------------------------------------------------
// set segment susceptibility 'alpha' / 'beta'

static WEBP_INLINE int clip(int v, int m, int M) {
  return (v < m) ? m : (v > M) ? M : v;
}

static void SetSegmentAlphas(VP8Encoder* const enc,
                             const int centers[NUM_MB_SEGMENTS], int mid) {
  const int nb = enc->segment_hdr.num_segments;
  int min = centers[0], max = centers[0];
  int n;

  if (nb > 1) {
    for (n = 0; n < nb; ++n) {
      if (min > centers[n]) min = centers[n];
      if (max < centers[n]) max = centers[n];
    }
  }
  if (max == min) max = min + 1;
  assert(mid <= max && mid >= min);
  for (n = 0; n < nb; ++n) {
    const int alpha = 255 * (centers[n] - mid) / (max - min);
    const int beta = 255 * (centers[n] - min) / (max - min);
    enc->dqm[n].alpha = clip(alpha, -127, 127);
    enc->dqm[n].beta = clip(beta, 0, 255);
  }
}

//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.

#define MAX_ALPHA 255                // 8b of precision for susceptibilities.
#define ALPHA_SCALE (2 * MAX_ALPHA)  // scaling factor for alpha.
#define DEFAULT_ALPHA (-1)
#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))

static int FinalAlphaValue(int alpha) {
  alpha = MAX_ALPHA - alpha;
  return clip(alpha, 0, MAX_ALPHA);
}

static int GetAlpha(const VP8Histogram* const histo) {
  // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
  // values which happen to be mostly noise. This leaves the maximum precision
  // for handling the useful small values which contribute most.
  const int max_value = histo->max_value;
  const int last_non_zero = histo->last_non_zero;
  const int alpha =
      (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
  return alpha;
}

static void InitHistogram(VP8Histogram* const histo) {
  histo->max_value = 0;
  histo->last_non_zero = 1;
}

//------------------------------------------------------------------------------
// Simplified k-Means, to assign Nb segments based on alpha-histogram

static void AssignSegments(VP8Encoder* const enc,
                           const int alphas[MAX_ALPHA + 1]) {
  // 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
  // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
  // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
  const int nb = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS)
                     ? enc->segment_hdr.num_segments
                     : NUM_MB_SEGMENTS;
  int centers[NUM_MB_SEGMENTS];
  int weighted_average = 0;
  int map[MAX_ALPHA + 1];
  int a, n, k;
  int min_a = 0, max_a = MAX_ALPHA, range_a;
  // 'int' type is ok for histo, and won't overflow
  int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];

  assert(nb >= 1);
  assert(nb <= NUM_MB_SEGMENTS);

  // bracket the input
  for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {
  }
  min_a = n;
  for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {
  }
  max_a = n;
  range_a = max_a - min_a;

  // Spread initial centers evenly
  for (k = 0, n = 1; k < nb; ++k, n += 2) {
    assert(n < 2 * nb);
    centers[k] = min_a + (n * range_a) / (2 * nb);
  }

  for (k = 0; k < MAX_ITERS_K_MEANS; ++k) {  // few iters are enough
    int total_weight;
    int displaced;
    // Reset stats
    for (n = 0; n < nb; ++n) {
      accum[n] = 0;
      dist_accum[n] = 0;
    }
    // Assign nearest center for each 'a'
    n = 0;  // track the nearest center for current 'a'
    for (a = min_a; a <= max_a; ++a) {
      if (alphas[a]) {
        while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
          n++;
        }
        map[a] = n;
        // accumulate contribution into best centroid
        dist_accum[n] += a * alphas[a];
        accum[n] += alphas[a];
      }
    }
    // All point are classified. Move the centroids to the
    // center of their respective cloud.
    displaced = 0;
    weighted_average = 0;
    total_weight = 0;
    for (n = 0; n < nb; ++n) {
      if (accum[n]) {
        const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
        displaced += abs(centers[n] - new_center);
        centers[n] = new_center;
        weighted_average += new_center * accum[n];
        total_weight += accum[n];
      }
    }
    weighted_average = (weighted_average + total_weight / 2) / total_weight;
    if (displaced < 5) break;  // no need to keep on looping...
  }

  // Map each original value to the closest centroid
  for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
    VP8MBInfo* const mb = &enc->mb_info[n];
    const int alpha = mb->alpha;
    mb->segment = map[alpha];
    mb->alpha = centers[map[alpha]];  // for the record.
  }

  if (nb > 1) {
    const int smooth = (enc->config->preprocessing & 1);
    if (smooth) SmoothSegmentMap(enc);
  }

  SetSegmentAlphas(enc, centers, weighted_average);  // pick some alphas.
}

//------------------------------------------------------------------------------
// Macroblock analysis: collect histogram for each mode, deduce the maximal
// susceptibility and set best modes for this macroblock.
// Segment assignment is done later.

// Number of modes to inspect for 'alpha' evaluation. We don't need to test all
// the possible modes during the analysis phase: we risk falling into a local
// optimum, or be subject to boundary effect
#define MAX_INTRA16_MODE 2
#define MAX_INTRA4_MODE 2
#define MAX_UV_MODE 2

static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
  const int max_mode = MAX_INTRA16_MODE;
  int mode;
  int best_alpha = DEFAULT_ALPHA;
  int best_mode = 0;

  VP8MakeLuma16Preds(it);
  for (mode = 0; mode < max_mode; ++mode) {
    VP8Histogram histo;
    int alpha;

    InitHistogram(&histo);
    VP8CollectHistogram(it->yuv_in + Y_OFF_ENC,
                        it->yuv_p + VP8I16ModeOffsets[mode], 0, 16, &histo);
    alpha = GetAlpha(&histo);
    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
      best_alpha = alpha;
      best_mode = mode;
    }
  }
  VP8SetIntra16Mode(it, best_mode);
  return best_alpha;
}

static int FastMBAnalyze(VP8EncIterator* const it) {
  // Empirical cut-off value, should be around 16 (~=block size). We use the
  // [8-17] range and favor intra4 at high quality, intra16 for low quality.
  const int q = (int)it->enc->config->quality;
  const uint64_t kThreshold = 8 + (17 - 8) * q / 100;
  int k;
  uint32_t dc[16];
  uint64_t m, m2;
  for (k = 0; k < 16; k += 4) {
    VP8Mean16x4(it->yuv_in + Y_OFF_ENC + k * BPS, &dc[k]);
  }
  for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
    // dc[k] is at most 16 (for loop of 16)*(16*255) (max value in dc after
    // Mean16x4, which uses two nested loops of 4). Squared as (16*16*255)^2, it
    // fits in a uint32_t.
    const uint32_t dc2 = dc[k] * dc[k];
    m += dc[k];
    m2 += dc2;
  }
  if (kThreshold * m2 < m * m) {
    VP8SetIntra16Mode(it, 0);  // DC16
  } else {
    const uint8_t modes[16] = {0};  // DC4
    VP8SetIntra4Mode(it, modes);
  }
  return 0;
}

static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
  int best_alpha = DEFAULT_ALPHA;
  int smallest_alpha = 0;
  int best_mode = 0;
  const int max_mode = MAX_UV_MODE;
  int mode;

  VP8MakeChroma8Preds(it);
  for (mode = 0; mode < max_mode; ++mode) {
    VP8Histogram histo;
    int alpha;
    InitHistogram(&histo);
    VP8CollectHistogram(it->yuv_in + U_OFF_ENC,
                        it->yuv_p + VP8UVModeOffsets[mode], 16, 16 + 4 + 4,
                        &histo);
    alpha = GetAlpha(&histo);
    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
      best_alpha = alpha;
    }
    // The best prediction mode tends to be the one with the smallest alpha.
    if (mode == 0 || alpha < smallest_alpha) {
      smallest_alpha = alpha;
      best_mode = mode;
    }
  }
  VP8SetIntraUVMode(it, best_mode);
  return best_alpha;
}

static void MBAnalyze(VP8EncIterator* const it, int alphas[MAX_ALPHA + 1],
                      int* const alpha, int* const uv_alpha) {
  const VP8Encoder* const enc = it->enc;
  int best_alpha, best_uv_alpha;

  VP8SetIntra16Mode(it, 0);  // default: Intra16, DC_PRED
  VP8SetSkip(it, 0);         // not skipped
  VP8SetSegment(it, 0);      // default segment, spec-wise.

  if (enc->method <= 1) {
    best_alpha = FastMBAnalyze(it);
  } else {
    best_alpha = MBAnalyzeBestIntra16Mode(it);
  }
  best_uv_alpha = MBAnalyzeBestUVMode(it);

  // Final susceptibility mix
  best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
  best_alpha = FinalAlphaValue(best_alpha);
  alphas[best_alpha]++;
  it->mb->alpha = best_alpha;  // for later remapping.

  // Accumulate for later complexity analysis.
  *alpha += best_alpha;  // mixed susceptibility (not just luma)
  *uv_alpha += best_uv_alpha;
}

static void DefaultMBInfo(VP8MBInfo* const mb) {
  mb->type = 1;  // I16x16
  mb->uv_mode = 0;
  mb->skip = 0;     // not skipped
  mb->segment = 0;  // default segment
  mb->alpha = 0;
}

//------------------------------------------------------------------------------
// Main analysis loop:
// Collect all susceptibilities for each macroblock and record their
// distribution in alphas[]. Segments is assigned a-posteriori, based on
// this histogram.
// We also pick an intra16 prediction mode, which shouldn't be considered
// final except for fast-encode settings. We can also pick some intra4 modes
// and decide intra4/intra16, but that's usually almost always a bad choice at
// this stage.

static void ResetAllMBInfo(VP8Encoder* const enc) {
  int n;
  for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
    DefaultMBInfo(&enc->mb_info[n]);
  }
  // Default susceptibilities.
  enc->dqm[0].alpha = 0;
  enc->dqm[0].beta = 0;
  // Note: we can't compute this 'alpha' / 'uv_alpha' -> set to default value.
  enc->alpha = 0;
  enc->uv_alpha = 0;
  WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);
}

// struct used to collect job result
typedef struct {
  WebPWorker worker;
  int alphas[MAX_ALPHA + 1];
  int alpha, uv_alpha;
  VP8EncIterator it;
  int delta_progress;
} SegmentJob;

// main work call
static int DoSegmentsJob(void* arg1, void* arg2) {
  SegmentJob* const job = (SegmentJob*)arg1;
  VP8EncIterator* const it = (VP8EncIterator*)arg2;
  int ok = 1;
  if (!VP8IteratorIsDone(it)) {
    uint8_t tmp[32 + WEBP_ALIGN_CST];
    uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
    do {
      // Let's pretend we have perfect lossless reconstruction.
      VP8IteratorImport(it, scratch);
      MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
      ok = VP8IteratorProgress(it, job->delta_progress);
    } while (ok && VP8IteratorNext(it));
  }
  return ok;
}

#ifdef WEBP_USE_THREAD
static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
  int i;
  for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
  dst->alpha += src->alpha;
  dst->uv_alpha += src->uv_alpha;
}
#endif

// initialize the job struct with some tasks to perform
static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
                           int start_row, int end_row) {
  WebPGetWorkerInterface()->Init(&job->worker);
  job->worker.data1 = job;
  job->worker.data2 = &job->it;
  job->worker.hook = DoSegmentsJob;
  VP8IteratorInit(enc, &job->it);
  VP8IteratorSetRow(&job->it, start_row);
  VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w);
  memset(job->alphas, 0, sizeof(job->alphas));
  job->alpha = 0;
  job->uv_alpha = 0;
  // only one of both jobs can record the progress, since we don't
  // expect the user's hook to be multi-thread safe
  job->delta_progress = (start_row == 0) ? 20 : 0;
}

// main entry point
int VP8EncAnalyze(VP8Encoder* const enc) {
  int ok = 1;
  const int do_segments =
      enc->config->emulate_jpeg_size ||  // We need the complexity evaluation.
      (enc->segment_hdr.num_segments > 1) ||
      (enc->method <= 1);  // for method 0 - 1, we need preds[] to be filled.
  if (do_segments) {
    const int last_row = enc->mb_h;
    const int total_mb = last_row * enc->mb_w;
#ifdef WEBP_USE_THREAD
    // We give a little more than a half work to the main thread.
    const int split_row = (9 * last_row + 15) >> 4;
    const int kMinSplitRow = 2;  // minimal rows needed for mt to be worth it
    const int do_mt = (enc->thread_level > 0) && (split_row >= kMinSplitRow);
#else
    const int do_mt = 0;
#endif
    const WebPWorkerInterface* const worker_interface =
        WebPGetWorkerInterface();
    SegmentJob main_job;
    if (do_mt) {
#ifdef WEBP_USE_THREAD
      SegmentJob side_job;
      // Note the use of '&' instead of '&&' because we must call the functions
      // no matter what.
      InitSegmentJob(enc, &main_job, 0, split_row);
      InitSegmentJob(enc, &side_job, split_row, last_row);
      // we don't need to call Reset() on main_job.worker, since we're calling
      // WebPWorkerExecute() on it
      ok &= worker_interface->Reset(&side_job.worker);
      // launch the two jobs in parallel
      if (ok) {
        worker_interface->Launch(&side_job.worker);
        worker_interface->Execute(&main_job.worker);
        ok &= worker_interface->Sync(&side_job.worker);
        ok &= worker_interface->Sync(&main_job.worker);
      }
      worker_interface->End(&side_job.worker);
      if (ok) MergeJobs(&side_job, &main_job);  // merge results together
#endif                                          // WEBP_USE_THREAD
    } else {
      // Even for single-thread case, we use the generic Worker tools.
      InitSegmentJob(enc, &main_job, 0, last_row);
      worker_interface->Execute(&main_job.worker);
      ok &= worker_interface->Sync(&main_job.worker);
    }
    worker_interface->End(&main_job.worker);
    if (ok) {
      enc->alpha = main_job.alpha / total_mb;
      enc->uv_alpha = main_job.uv_alpha / total_mb;
      AssignSegments(enc, main_job.alphas);
    }
  } else {  // Use only one default segment.
    ResetAllMBInfo(enc);
  }
  if (!ok) {
    return WebPEncodingSetError(enc->pic,
                                VP8_ENC_ERROR_OUT_OF_MEMORY);  // imprecise
  }
  return ok;
}

Coverage Report

Created: 2026-03-31 06:56

Line	Count	Source
1		// Copyright 2011 Google Inc. All Rights Reserved.
2		//
3		// Use of this source code is governed by a BSD-style license
4		// that can be found in the COPYING file in the root of the source
5		// tree. An additional intellectual property rights grant can be found
6		// in the file PATENTS. All contributing project authors may
7		// be found in the AUTHORS file in the root of the source tree.
8		// -----------------------------------------------------------------------------
9		//
10		// Macroblock analysis
11		//
12		// Author: Skal (pascal.massimino@gmail.com)
13
14		#include <assert.h>
15		#include <stdlib.h>
16		#include <string.h>
17
18		#include "src/dec/common_dec.h"
19		#include "src/dsp/dsp.h"
20		#include "src/enc/vp8i_enc.h"
21		#include "src/utils/thread_utils.h"
22		#include "src/utils/utils.h"
23		#include "src/webp/encode.h"
24		#include "src/webp/types.h"
25
26	3.72k	#define MAX_ITERS_K_MEANS 6
27
28		//------------------------------------------------------------------------------
29		// Smooth the segment map by replacing isolated block by the majority of its
30		// neighbours.
31
32	0	static void SmoothSegmentMap(VP8Encoder* const enc) {
33	0	int n, x, y;
34	0	const int w = enc->mb_w;
35	0	const int h = enc->mb_h;
36	0	const int majority_cnt_3_x_3_grid = 5;
37	0	uint8_t* const tmp = (uint8_t)WebPSafeMalloc(w h, sizeof(*tmp));
38	0	assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec
39
40	0	if (tmp == NULL) return;
41	0	for (y = 1; y < h - 1; ++y) {
42	0	for (x = 1; x < w - 1; ++x) {
43	0	int cnt[NUM_MB_SEGMENTS] = {0};
44	0	const VP8MBInfo* const mb = &enc->mb_info[x + w * y];
45	0	int majority_seg = mb->segment;
46		// Check the 8 neighbouring segment values.
47	0	cnt[mb[-w - 1].segment]++; // top-left
48	0	cnt[mb[-w + 0].segment]++; // top
49	0	cnt[mb[-w + 1].segment]++; // top-right
50	0	cnt[mb[-1].segment]++; // left
51	0	cnt[mb[+1].segment]++; // right
52	0	cnt[mb[w - 1].segment]++; // bottom-left
53	0	cnt[mb[w + 0].segment]++; // bottom
54	0	cnt[mb[w + 1].segment]++; // bottom-right
55	0	for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
56	0	if (cnt[n] >= majority_cnt_3_x_3_grid) {
57	0	majority_seg = n;
58	0	break;
59	0	}
60	0	}
61	0	tmp[x + y * w] = majority_seg;
62	0	}
63	0	}
64	0	for (y = 1; y < h - 1; ++y) {
65	0	for (x = 1; x < w - 1; ++x) {
66	0	VP8MBInfo* const mb = &enc->mb_info[x + w * y];
67	0	mb->segment = tmp[x + y * w];
68	0	}
69	0	}
70	0	WebPSafeFree(tmp);
71	0	}
72
73		//------------------------------------------------------------------------------
74		// set segment susceptibility 'alpha' / 'beta'
75
76	3.31M	static WEBP_INLINE int clip(int v, int m, int M) {
77	3.31M	return (v < m) ? m : (v > M) ? M : v;
78	3.31M	}
79
80		static void SetSegmentAlphas(VP8Encoder* const enc,
81	1.72k	const int centers[NUM_MB_SEGMENTS], int mid) {
82	1.72k	const int nb = enc->segment_hdr.num_segments;
83	1.72k	int min = centers[0], max = centers[0];
84	1.72k	int n;
85
86	1.72k	if (nb > 1) {
87	8.61k	for (n = 0; n < nb; ++n) {
88	6.89k	if (min > centers[n]) min = centers[n];
89	6.89k	if (max < centers[n]) max = centers[n];
90	6.89k	}
91	1.72k	}
92	1.72k	if (max == min) max = min + 1;
93	1.72k	assert(mid <= max && mid >= min);
94	8.61k	for (n = 0; n < nb; ++n) {
95	6.89k	const int alpha = 255 * (centers[n] - mid) / (max - min);
96	6.89k	const int beta = 255 * (centers[n] - min) / (max - min);
97	6.89k	enc->dqm[n].alpha = clip(alpha, -127, 127);
98	6.89k	enc->dqm[n].beta = clip(beta, 0, 255);
99	6.89k	}
100	1.72k	}
101
102		//------------------------------------------------------------------------------
103		// Compute susceptibility based on DCT-coeff histograms:
104		// the higher, the "easier" the macroblock is to compress.
105
106	20.3M	#define MAX_ALPHA 255 // 8b of precision for susceptibilities.
107	13.2M	#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.
108	6.60M	#define DEFAULT_ALPHA (-1)
109	13.2M	#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
110
111	3.30M	static int FinalAlphaValue(int alpha) {
112	3.30M	alpha = MAX_ALPHA - alpha;
113	3.30M	return clip(alpha, 0, MAX_ALPHA);
114	3.30M	}
115
116	13.2M	static int GetAlpha(const VP8Histogram* const histo) {
117		// 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
118		// values which happen to be mostly noise. This leaves the maximum precision
119		// for handling the useful small values which contribute most.
120	13.2M	const int max_value = histo->max_value;
121	13.2M	const int last_non_zero = histo->last_non_zero;
122	13.2M	const int alpha =
123	13.2M	(max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
124	13.2M	return alpha;
125	13.2M	}
126
127	13.2M	static void InitHistogram(VP8Histogram* const histo) {
128	13.2M	histo->max_value = 0;
129	13.2M	histo->last_non_zero = 1;
130	13.2M	}
131
132		//------------------------------------------------------------------------------
133		// Simplified k-Means, to assign Nb segments based on alpha-histogram
134
135		static void AssignSegments(VP8Encoder* const enc,
136	1.72k	const int alphas[MAX_ALPHA + 1]) {
137		// 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
138		// explicit check is needed to avoid spurious warning about 'n + 1' exceeding
139		// array bounds of 'centers' with some compilers (noticed with gcc-4.9).
140	1.72k	const int nb = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS)
141	1.72k	? enc->segment_hdr.num_segments
142	1.72k	: NUM_MB_SEGMENTS;
143	1.72k	int centers[NUM_MB_SEGMENTS];
144	1.72k	int weighted_average = 0;
145	1.72k	int map[MAX_ALPHA + 1];
146	1.72k	int a, n, k;
147	1.72k	int min_a = 0, max_a = MAX_ALPHA, range_a;
148		// 'int' type is ok for histo, and won't overflow
149	1.72k	int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
150
151	1.72k	assert(nb >= 1);
152	1.72k	assert(nb <= NUM_MB_SEGMENTS);
153
154		// bracket the input
155	273k	for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {
156	271k	}
157	1.72k	min_a = n;
158	40.3k	for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {
159	38.5k	}
160	1.72k	max_a = n;
161	1.72k	range_a = max_a - min_a;
162
163		// Spread initial centers evenly
164	8.61k	for (k = 0, n = 1; k < nb; ++k, n += 2) {
165	6.89k	assert(n < 2 * nb);
166	6.89k	centers[k] = min_a + (n * range_a) / (2 * nb);
167	6.89k	}
168
169	3.72k	for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough
170	3.68k	int total_weight;
171	3.68k	int displaced;
172		// Reset stats
173	18.4k	for (n = 0; n < nb; ++n) {
174	14.7k	accum[n] = 0;
175	14.7k	dist_accum[n] = 0;
176	14.7k	}
177		// Assign nearest center for each 'a'
178	3.68k	n = 0; // track the nearest center for current 'a'
179	370k	for (a = min_a; a <= max_a; ++a) {
180	366k	if (alphas[a]) {
181	157k	while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
182	10.1k	n++;
183	10.1k	}
184	146k	map[a] = n;
185		// accumulate contribution into best centroid
186	146k	dist_accum[n] += a * alphas[a];
187	146k	accum[n] += alphas[a];
188	146k	}
189	366k	}
190		// All point are classified. Move the centroids to the
191		// center of their respective cloud.
192	3.68k	displaced = 0;
193	3.68k	weighted_average = 0;
194	3.68k	total_weight = 0;
195	18.4k	for (n = 0; n < nb; ++n) {
196	14.7k	if (accum[n]) {
197	11.2k	const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
198	11.2k	displaced += abs(centers[n] - new_center);
199	11.2k	centers[n] = new_center;
200	11.2k	weighted_average += new_center * accum[n];
201	11.2k	total_weight += accum[n];
202	11.2k	}
203	14.7k	}
204	3.68k	weighted_average = (weighted_average + total_weight / 2) / total_weight;
205	3.68k	if (displaced < 5) break; // no need to keep on looping...
206	3.68k	}
207
208		// Map each original value to the closest centroid
209	3.30M	for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
210	3.30M	VP8MBInfo* const mb = &enc->mb_info[n];
211	3.30M	const int alpha = mb->alpha;
212	3.30M	mb->segment = map[alpha];
213	3.30M	mb->alpha = centers[map[alpha]]; // for the record.
214	3.30M	}
215
216	1.72k	if (nb > 1) {
217	1.72k	const int smooth = (enc->config->preprocessing & 1);
218	1.72k	if (smooth) SmoothSegmentMap(enc);
219	1.72k	}
220
221	1.72k	SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
222	1.72k	}
223
224		//------------------------------------------------------------------------------
225		// Macroblock analysis: collect histogram for each mode, deduce the maximal
226		// susceptibility and set best modes for this macroblock.
227		// Segment assignment is done later.
228
229		// Number of modes to inspect for 'alpha' evaluation. We don't need to test all
230		// the possible modes during the analysis phase: we risk falling into a local
231		// optimum, or be subject to boundary effect
232	3.30M	#define MAX_INTRA16_MODE 2
233		#define MAX_INTRA4_MODE 2
234	3.30M	#define MAX_UV_MODE 2
235
236	3.30M	static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
237	3.30M	const int max_mode = MAX_INTRA16_MODE;
238	3.30M	int mode;
239	3.30M	int best_alpha = DEFAULT_ALPHA;
240	3.30M	int best_mode = 0;
241
242	3.30M	VP8MakeLuma16Preds(it);
243	9.91M	for (mode = 0; mode < max_mode; ++mode) {
244	6.60M	VP8Histogram histo;
245	6.60M	int alpha;
246
247	6.60M	InitHistogram(&histo);
248	6.60M	VP8CollectHistogram(it->yuv_in + Y_OFF_ENC,
249	6.60M	it->yuv_p + VP8I16ModeOffsets[mode], 0, 16, &histo);
250	6.60M	alpha = GetAlpha(&histo);
251	6.60M	if (IS_BETTER_ALPHA(alpha, best_alpha)) {
252	3.74M	best_alpha = alpha;
253	3.74M	best_mode = mode;
254	3.74M	}
255	6.60M	}
256	3.30M	VP8SetIntra16Mode(it, best_mode);
257	3.30M	return best_alpha;
258	3.30M	}
259
260	0	static int FastMBAnalyze(VP8EncIterator* const it) {
261		// Empirical cut-off value, should be around 16 (~=block size). We use the
262		// [8-17] range and favor intra4 at high quality, intra16 for low quality.
263	0	const int q = (int)it->enc->config->quality;
264	0	const uint64_t kThreshold = 8 + (17 - 8) * q / 100;
265	0	int k;
266	0	uint32_t dc[16];
267	0	uint64_t m, m2;
268	0	for (k = 0; k < 16; k += 4) {
269	0	VP8Mean16x4(it->yuv_in + Y_OFF_ENC + k * BPS, &dc[k]);
270	0	}
271	0	for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
272		// dc[k] is at most 16 (for loop of 16)(16255) (max value in dc after
273		// Mean16x4, which uses two nested loops of 4). Squared as (1616255)^2, it
274		// fits in a uint32_t.
275	0	const uint32_t dc2 = dc[k] * dc[k];
276	0	m += dc[k];
277	0	m2 += dc2;
278	0	}
279	0	if (kThreshold * m2 < m * m) {
280	0	VP8SetIntra16Mode(it, 0); // DC16
281	0	} else {
282	0	const uint8_t modes[16] = {0}; // DC4
283	0	VP8SetIntra4Mode(it, modes);
284	0	}
285	0	return 0;
286	0	}
287
288	3.30M	static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
289	3.30M	int best_alpha = DEFAULT_ALPHA;
290	3.30M	int smallest_alpha = 0;
291	3.30M	int best_mode = 0;
292	3.30M	const int max_mode = MAX_UV_MODE;
293	3.30M	int mode;
294
295	3.30M	VP8MakeChroma8Preds(it);
296	9.91M	for (mode = 0; mode < max_mode; ++mode) {
297	6.60M	VP8Histogram histo;
298	6.60M	int alpha;
299	6.60M	InitHistogram(&histo);
300	6.60M	VP8CollectHistogram(it->yuv_in + U_OFF_ENC,
301	6.60M	it->yuv_p + VP8UVModeOffsets[mode], 16, 16 + 4 + 4,
302	6.60M	&histo);
303	6.60M	alpha = GetAlpha(&histo);
304	6.60M	if (IS_BETTER_ALPHA(alpha, best_alpha)) {
305	3.84M	best_alpha = alpha;
306	3.84M	}
307		// The best prediction mode tends to be the one with the smallest alpha.
308	6.60M	if (mode == 0 \|\| alpha < smallest_alpha) {
309	3.95M	smallest_alpha = alpha;
310	3.95M	best_mode = mode;
311	3.95M	}
312	6.60M	}
313	3.30M	VP8SetIntraUVMode(it, best_mode);
314	3.30M	return best_alpha;
315	3.30M	}
316
317		static void MBAnalyze(VP8EncIterator* const it, int alphas[MAX_ALPHA + 1],
318	3.30M	int* const alpha, int* const uv_alpha) {
319	3.30M	const VP8Encoder* const enc = it->enc;
320	3.30M	int best_alpha, best_uv_alpha;
321
322	3.30M	VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED
323	3.30M	VP8SetSkip(it, 0); // not skipped
324	3.30M	VP8SetSegment(it, 0); // default segment, spec-wise.
325
326	3.30M	if (enc->method <= 1) {
327	0	best_alpha = FastMBAnalyze(it);
328	3.30M	} else {
329	3.30M	best_alpha = MBAnalyzeBestIntra16Mode(it);
330	3.30M	}
331	3.30M	best_uv_alpha = MBAnalyzeBestUVMode(it);
332
333		// Final susceptibility mix
334	3.30M	best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
335	3.30M	best_alpha = FinalAlphaValue(best_alpha);
336	3.30M	alphas[best_alpha]++;
337	3.30M	it->mb->alpha = best_alpha; // for later remapping.
338
339		// Accumulate for later complexity analysis.
340	3.30M	*alpha += best_alpha; // mixed susceptibility (not just luma)
341	3.30M	*uv_alpha += best_uv_alpha;
342	3.30M	}
343
344	0	static void DefaultMBInfo(VP8MBInfo* const mb) {
345	0	mb->type = 1; // I16x16
346	0	mb->uv_mode = 0;
347	0	mb->skip = 0; // not skipped
348	0	mb->segment = 0; // default segment
349	0	mb->alpha = 0;
350	0	}
351
352		//------------------------------------------------------------------------------
353		// Main analysis loop:
354		// Collect all susceptibilities for each macroblock and record their
355		// distribution in alphas[]. Segments is assigned a-posteriori, based on
356		// this histogram.
357		// We also pick an intra16 prediction mode, which shouldn't be considered
358		// final except for fast-encode settings. We can also pick some intra4 modes
359		// and decide intra4/intra16, but that's usually almost always a bad choice at
360		// this stage.
361
362	0	static void ResetAllMBInfo(VP8Encoder* const enc) {
363	0	int n;
364	0	for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
365	0	DefaultMBInfo(&enc->mb_info[n]);
366	0	}
367		// Default susceptibilities.
368	0	enc->dqm[0].alpha = 0;
369	0	enc->dqm[0].beta = 0;
370		// Note: we can't compute this 'alpha' / 'uv_alpha' -> set to default value.
371	0	enc->alpha = 0;
372	0	enc->uv_alpha = 0;
373	0	WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);
374	0	}
375
376		// struct used to collect job result
377		typedef struct {
378		WebPWorker worker;
379		int alphas[MAX_ALPHA + 1];
380		int alpha, uv_alpha;
381		VP8EncIterator it;
382		int delta_progress;
383		} SegmentJob;
384
385		// main work call
386	1.72k	static int DoSegmentsJob(void* arg1, void* arg2) {
387	1.72k	SegmentJob* const job = (SegmentJob*)arg1;
388	1.72k	VP8EncIterator* const it = (VP8EncIterator*)arg2;
389	1.72k	int ok = 1;
390	1.72k	if (!VP8IteratorIsDone(it)) {
391	1.72k	uint8_t tmp[32 + WEBP_ALIGN_CST];
392	1.72k	uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
393	3.30M	do {
394		// Let's pretend we have perfect lossless reconstruction.
395	3.30M	VP8IteratorImport(it, scratch);
396	3.30M	MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
397	3.30M	ok = VP8IteratorProgress(it, job->delta_progress);
398	3.30M	} while (ok && VP8IteratorNext(it));
399	1.72k	}
400	1.72k	return ok;
401	1.72k	}
402
403		#ifdef WEBP_USE_THREAD
404	0	static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
405	0	int i;
406	0	for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
407	0	dst->alpha += src->alpha;
408	0	dst->uv_alpha += src->uv_alpha;
409	0	}
410		#endif
411
412		// initialize the job struct with some tasks to perform
413		static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
414	1.72k	int start_row, int end_row) {
415	1.72k	WebPGetWorkerInterface()->Init(&job->worker);
416	1.72k	job->worker.data1 = job;
417	1.72k	job->worker.data2 = &job->it;
418	1.72k	job->worker.hook = DoSegmentsJob;
419	1.72k	VP8IteratorInit(enc, &job->it);
420	1.72k	VP8IteratorSetRow(&job->it, start_row);
421	1.72k	VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w);
422	1.72k	memset(job->alphas, 0, sizeof(job->alphas));
423	1.72k	job->alpha = 0;
424	1.72k	job->uv_alpha = 0;
425		// only one of both jobs can record the progress, since we don't
426		// expect the user's hook to be multi-thread safe
427	1.72k	job->delta_progress = (start_row == 0) ? 20 : 0;
428	1.72k	}
429
430		// main entry point
431	1.72k	int VP8EncAnalyze(VP8Encoder* const enc) {
432	1.72k	int ok = 1;
433	1.72k	const int do_segments =
434	1.72k	enc->config->emulate_jpeg_size \|\| // We need the complexity evaluation.
435	1.72k	(enc->segment_hdr.num_segments > 1) \|\|
436	0	(enc->method <= 1); // for method 0 - 1, we need preds[] to be filled.
437	1.72k	if (do_segments) {
438	1.72k	const int last_row = enc->mb_h;
439	1.72k	const int total_mb = last_row * enc->mb_w;
440	1.72k	#ifdef WEBP_USE_THREAD
441		// We give a little more than a half work to the main thread.
442	1.72k	const int split_row = (9 * last_row + 15) >> 4;
443	1.72k	const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
444	1.72k	const int do_mt = (enc->thread_level > 0) && (split_row >= kMinSplitRow);
445		#else
446		const int do_mt = 0;
447		#endif
448	1.72k	const WebPWorkerInterface* const worker_interface =
449	1.72k	WebPGetWorkerInterface();
450	1.72k	SegmentJob main_job;
451	1.72k	if (do_mt) {
452	0	#ifdef WEBP_USE_THREAD
453	0	SegmentJob side_job;
454		// Note the use of '&' instead of '&&' because we must call the functions
455		// no matter what.
456	0	InitSegmentJob(enc, &main_job, 0, split_row);
457	0	InitSegmentJob(enc, &side_job, split_row, last_row);
458		// we don't need to call Reset() on main_job.worker, since we're calling
459		// WebPWorkerExecute() on it
460	0	ok &= worker_interface->Reset(&side_job.worker);
461		// launch the two jobs in parallel
462	0	if (ok) {
463	0	worker_interface->Launch(&side_job.worker);
464	0	worker_interface->Execute(&main_job.worker);
465	0	ok &= worker_interface->Sync(&side_job.worker);
466	0	ok &= worker_interface->Sync(&main_job.worker);
467	0	}
468	0	worker_interface->End(&side_job.worker);
469	0	if (ok) MergeJobs(&side_job, &main_job); // merge results together
470	0	#endif // WEBP_USE_THREAD
471	1.72k	} else {
472		// Even for single-thread case, we use the generic Worker tools.
473	1.72k	InitSegmentJob(enc, &main_job, 0, last_row);
474	1.72k	worker_interface->Execute(&main_job.worker);
475	1.72k	ok &= worker_interface->Sync(&main_job.worker);
476	1.72k	}
477	1.72k	worker_interface->End(&main_job.worker);
478	1.72k	if (ok) {
479	1.72k	enc->alpha = main_job.alpha / total_mb;
480	1.72k	enc->uv_alpha = main_job.uv_alpha / total_mb;
481	1.72k	AssignSegments(enc, main_job.alphas);
482	1.72k	}
483	1.72k	} else { // Use only one default segment.
484	0	ResetAllMBInfo(enc);
485	0	}
486	1.72k	if (!ok) {
487	0	return WebPEncodingSetError(enc->pic,
488	0	VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
489	0	}
490	1.72k	return ok;
491	1.72k	}