Coverage Report

Created: 2026-05-16 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/theora/lib/mcenc.c
Line
Count
Source
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7
 *                                                                  *
8
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9
 * by the Xiph.Org Foundation https://www.xiph.org/                 *
10
 *                                                                  *
11
 ********************************************************************
12
13
  function:
14
15
 ********************************************************************/
16
#include <stdlib.h>
17
#include <limits.h>
18
#include <string.h>
19
#include "encint.h"
20
21
22
23
typedef struct oc_mcenc_ctx           oc_mcenc_ctx;
24
25
26
27
/*Temporary state used for motion estimation.*/
28
struct oc_mcenc_ctx{
29
  /*The candidate motion vectors.*/
30
  int                candidates[13][2];
31
  /*The start of the Set B candidates.*/
32
  int                setb0;
33
  /*The total number of candidates.*/
34
  int                ncandidates;
35
};
36
37
38
39
/*The maximum Y plane SAD value for accepting the median predictor.*/
40
337k
#define OC_YSAD_THRESH1            (256)
41
/*The amount to right shift the minimum error by when inflating it for
42
   computing the second maximum Y plane SAD threshold.*/
43
306k
#define OC_YSAD_THRESH2_SCALE_BITS (4)
44
/*The amount to add to the second maximum Y plane threshold when inflating
45
   it.*/
46
306k
#define OC_YSAD_THRESH2_OFFSET     (64)
47
48
/*The vector offsets in the X direction for each search site in the square
49
   pattern.*/
50
static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
51
/*The vector offsets in the Y direction for each search site in the square
52
   pattern.*/
53
static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
54
/*The number of sites to search for each boundary condition in the square
55
   pattern.
56
  Bit flags for the boundary conditions are as follows:
57
  1: -16==dx
58
  2:      dx==15(.5)
59
  4: -16==dy
60
  8:      dy==15(.5)*/
61
static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
62
/*The list of sites to search for each boundary condition in the square
63
   pattern.*/
64
static const int OC_SQUARE_SITES[11][8]={
65
  /* -15.5<dx<31,       -15.5<dy<15(.5)*/
66
  {0,1,2,3,5,6,7,8},
67
  /*-15.5==dx,          -15.5<dy<15(.5)*/
68
  {1,2,5,7,8},
69
  /*     dx==15(.5),    -15.5<dy<15(.5)*/
70
  {0,1,3,6,7},
71
  /*-15.5==dx==15(.5),  -15.5<dy<15(.5)*/
72
  {-1},
73
  /* -15.5<dx<15(.5),  -15.5==dy*/
74
  {3,5,6,7,8},
75
  /*-15.5==dx,         -15.5==dy*/
76
  {5,7,8},
77
  /*     dx==15(.5),   -15.5==dy*/
78
  {3,6,7},
79
  /*-15.5==dx==15(.5), -15.5==dy*/
80
  {-1},
81
  /*-15.5dx<15(.5),           dy==15(.5)*/
82
  {0,1,2,3,5},
83
  /*-15.5==dx,                dy==15(.5)*/
84
  {1,2,5},
85
  /*       dx==15(.5),        dy==15(.5)*/
86
  {0,1,3}
87
};
88
89
90
static void oc_mcenc_find_candidates_a(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
91
337k
 oc_mv _accum,int _mbi,int _frame){
92
337k
  oc_mb_enc_info *embs;
93
337k
  int             accum_x;
94
337k
  int             accum_y;
95
337k
  int             a[3][2];
96
337k
  int             ncandidates;
97
337k
  unsigned        nmbi;
98
337k
  int             i;
99
337k
  embs=_enc->mb_info;
100
  /*Skip a position to store the median predictor in.*/
101
337k
  ncandidates=1;
102
337k
  if(embs[_mbi].ncneighbors>0){
103
    /*Fill in the first part of set A: the vectors from adjacent blocks.*/
104
860k
    for(i=0;i<embs[_mbi].ncneighbors;i++){
105
576k
      nmbi=embs[_mbi].cneighbors[i];
106
576k
      _mcenc->candidates[ncandidates][0]=
107
576k
       OC_MV_X(embs[nmbi].analysis_mv[0][_frame]);
108
576k
      _mcenc->candidates[ncandidates][1]=
109
576k
       OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]);
110
576k
      ncandidates++;
111
576k
    }
112
284k
  }
113
337k
  accum_x=OC_MV_X(_accum);
114
337k
  accum_y=OC_MV_Y(_accum);
115
  /*Add a few additional vectors to set A: the vectors used in the previous
116
     frames and the (0,0) vector.*/
117
337k
  _mcenc->candidates[ncandidates][0]=accum_x;
118
337k
  _mcenc->candidates[ncandidates][1]=accum_y;
119
337k
  ncandidates++;
120
337k
  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
121
337k
   OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31);
122
337k
  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
123
337k
   OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31);
124
337k
  ncandidates++;
125
337k
  _mcenc->candidates[ncandidates][0]=0;
126
337k
  _mcenc->candidates[ncandidates][1]=0;
127
337k
  ncandidates++;
128
  /*Use the first three vectors of set A to find our best predictor: their
129
     median.*/
130
337k
  memcpy(a,_mcenc->candidates+1,sizeof(a));
131
337k
  OC_SORT2I(a[0][0],a[1][0]);
132
337k
  OC_SORT2I(a[0][1],a[1][1]);
133
337k
  OC_SORT2I(a[1][0],a[2][0]);
134
337k
  OC_SORT2I(a[1][1],a[2][1]);
135
337k
  OC_SORT2I(a[0][0],a[1][0]);
136
337k
  OC_SORT2I(a[0][1],a[1][1]);
137
337k
  _mcenc->candidates[0][0]=a[1][0];
138
337k
  _mcenc->candidates[0][1]=a[1][1];
139
337k
  _mcenc->setb0=ncandidates;
140
337k
}
141
142
static void oc_mcenc_find_candidates_b(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
143
87.5k
 oc_mv _accum,int _mbi,int _frame){
144
87.5k
  oc_mb_enc_info *embs;
145
87.5k
  int             accum_x;
146
87.5k
  int             accum_y;
147
87.5k
  int             ncandidates;
148
87.5k
  embs=_enc->mb_info;
149
87.5k
  accum_x=OC_MV_X(_accum);
150
87.5k
  accum_y=OC_MV_Y(_accum);
151
  /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
152
87.5k
  ncandidates=_mcenc->setb0;
153
  /*Use only the current block. Using more did not appear to be helpful
154
    with the current selection logic due to escaping the local search too
155
    quickly.*/
156
87.5k
  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
157
87.5k
   2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame])
158
87.5k
   -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31);
159
87.5k
  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
160
87.5k
   2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])
161
87.5k
   -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31);
162
87.5k
  ncandidates++;
163
87.5k
  _mcenc->ncandidates=ncandidates;
164
87.5k
}
165
166
static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
167
 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
168
 int _mvoffset0,int _mvoffset1,const unsigned char *_src,
169
0
 const unsigned char *_ref,int _ystride,unsigned _best_err){
170
0
  unsigned err;
171
0
  int      bi;
172
0
  err=0;
173
0
  for(bi=0;bi<4;bi++){
174
0
    ptrdiff_t frag_offs;
175
0
    frag_offs=_frag_buf_offs[_fragis[bi]];
176
0
    err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
177
0
     _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
178
0
  }
179
0
  return err;
180
0
}
181
182
static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
183
 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
184
 int _mvoffset0,int _mvoffset1,const unsigned char *_src,
185
1.72M
 const unsigned char *_ref,int _ystride,unsigned _best_err){
186
1.72M
  unsigned err;
187
1.72M
  int      dc;
188
1.72M
  int      bi;
189
1.72M
  err=0;
190
8.64M
  for(bi=0;bi<4;bi++){
191
6.91M
    ptrdiff_t frag_offs;
192
6.91M
    frag_offs=_frag_buf_offs[_fragis[bi]];
193
6.91M
    err+=oc_enc_frag_satd2(_enc,&dc,_src+frag_offs,
194
6.91M
     _ref+frag_offs+_mvoffset0,_ref+frag_offs+_mvoffset1,_ystride);
195
6.91M
    err+=abs(dc);
196
6.91M
  }
197
1.72M
  return err;
198
1.72M
}
199
200
static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
201
 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
202
 const unsigned char *_src,const unsigned char *_ref,int _ystride,
203
3.12M
 unsigned _block_err[4]){
204
3.12M
  unsigned err;
205
3.12M
  int      mvoffset;
206
3.12M
  int      bi;
207
3.12M
  mvoffset=_dx+_dy*_ystride;
208
3.12M
  err=0;
209
15.6M
  for(bi=0;bi<4;bi++){
210
12.4M
    ptrdiff_t frag_offs;
211
12.4M
    unsigned  block_err;
212
12.4M
    frag_offs=_frag_buf_offs[_fragis[bi]];
213
12.4M
    block_err=oc_enc_frag_sad(_enc,
214
12.4M
     _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
215
12.4M
    _block_err[bi]=block_err;
216
12.4M
    err+=block_err;
217
12.4M
  }
218
3.12M
  return err;
219
3.12M
}
220
221
static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
222
 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
223
337k
 const unsigned char *_src,const unsigned char *_ref,int _ystride){
224
337k
  int mvoffset;
225
337k
  int err;
226
337k
  int bi;
227
337k
  mvoffset=_dx+_dy*_ystride;
228
337k
  err=0;
229
1.68M
  for(bi=0;bi<4;bi++){
230
1.35M
    ptrdiff_t frag_offs;
231
1.35M
    int       dc;
232
1.35M
    frag_offs=_frag_buf_offs[_fragis[bi]];
233
1.35M
    if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
234
1.35M
      err+=oc_enc_frag_satd(_enc,&dc,
235
1.35M
       _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
236
1.35M
      err+=abs(dc);
237
1.35M
    }
238
0
    else{
239
0
      err+=oc_enc_frag_sad(_enc,
240
0
       _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
241
0
    }
242
1.35M
  }
243
337k
  return err;
244
337k
}
245
246
static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
247
 ptrdiff_t _frag_offs,int _dx,int _dy,
248
675k
 const unsigned char *_src,const unsigned char *_ref,int _ystride){
249
675k
  unsigned err;
250
675k
  int      dc;
251
675k
  err=oc_enc_frag_satd(_enc,&dc,
252
675k
   _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride);
253
675k
  return err+abs(dc);
254
675k
}
255
256
/*Perform a motion vector search for this macro block against a single
257
   reference frame.
258
  As a bonus, individual block motion vectors are computed as well, as much of
259
   the work can be shared.
260
  The actual motion vector is stored in the appropriate place in the
261
   oc_mb_enc_info structure.
262
  _accum:      Drop frame/golden MV accumulators.
263
  _mbi:        The macro block index.
264
  _frame:      The frame to use for SATD calculations and refinement,
265
                either OC_FRAME_PREV or OC_FRAME_GOLD.
266
  _frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV,
267
                OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/
268
void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame,
269
337k
 int _frame_full){
270
  /*Note: Traditionally this search is done using a rate-distortion objective
271
     function of the form D+lambda*R.
272
    However, xiphmont tested this and found it produced a small degradation,
273
     while requiring extra computation.
274
    This is most likely due to Theora's peculiar MV encoding scheme: MVs are
275
     not coded relative to a predictor, and the only truly cheap way to use a
276
     MV is in the LAST or LAST2 MB modes, which are not being considered here.
277
    Therefore if we use the MV found here, it's only because both LAST and
278
     LAST2 performed poorly, and therefore the MB is not likely to be uniform
279
     or suffer from the aperture problem.
280
    Furthermore we would like to reuse the MV found here for as many MBs as
281
     possible, so picking a slightly sub-optimal vector to save a bit or two
282
     may cause increased degradation in many blocks to come.
283
    We could artificially reduce lambda to compensate, but it's faster to just
284
     disable it entirely, and use D (the distortion) as the sole criterion.*/
285
337k
  oc_mcenc_ctx         mcenc;
286
337k
  const ptrdiff_t     *frag_buf_offs;
287
337k
  const ptrdiff_t     *fragis;
288
337k
  const unsigned char *src;
289
337k
  const unsigned char *ref;
290
337k
  const unsigned char *satd_ref;
291
337k
  int                  ystride;
292
337k
  oc_mb_enc_info      *embs;
293
337k
  ogg_int32_t          hit_cache[31];
294
337k
  ogg_int32_t          hitbit;
295
337k
  unsigned             best_block_err[4];
296
337k
  unsigned             block_err[4];
297
337k
  unsigned             best_err;
298
337k
  int                  best_vec[2];
299
337k
  int                  best_block_vec[4][2];
300
337k
  int                  candx;
301
337k
  int                  candy;
302
337k
  int                  bi;
303
337k
  embs=_enc->mb_info;
304
  /*Find some candidate motion vectors.*/
305
337k
  oc_mcenc_find_candidates_a(_enc,&mcenc,_accum,_mbi,_frame);
306
  /*Clear the cache of locations we've examined.*/
307
337k
  memset(hit_cache,0,sizeof(hit_cache));
308
  /*Start with the median predictor.*/
309
337k
  candx=OC_DIV2(mcenc.candidates[0][0]);
310
337k
  candy=OC_DIV2(mcenc.candidates[0][1]);
311
337k
  hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
312
337k
  frag_buf_offs=_enc->state.frag_buf_offs;
313
337k
  fragis=_enc->state.mb_maps[_mbi][0];
314
337k
  src=_enc->state.ref_frame_data[OC_FRAME_IO];
315
337k
  ref=_enc->state.ref_frame_data[_frame_full];
316
337k
  satd_ref=_enc->state.ref_frame_data[_frame];
317
337k
  ystride=_enc->state.ref_ystride[0];
318
  /*TODO: customize error function for speed/(quality+size) tradeoff.*/
319
337k
  best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
320
337k
   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
321
337k
  best_vec[0]=candx;
322
337k
  best_vec[1]=candy;
323
337k
  if(_frame==OC_FRAME_PREV){
324
844k
    for(bi=0;bi<4;bi++){
325
675k
      best_block_err[bi]=block_err[bi];
326
675k
      best_block_vec[bi][0]=candx;
327
675k
      best_block_vec[bi][1]=candy;
328
675k
    }
329
168k
  }
330
  /*If this predictor fails, move on to set A.*/
331
337k
  if(best_err>OC_YSAD_THRESH1){
332
306k
    unsigned err;
333
306k
    unsigned t2;
334
306k
    int      ncs;
335
306k
    int      ci;
336
    /*Compute the early termination threshold for set A.*/
337
306k
    t2=embs[_mbi].error[_frame];
338
306k
    ncs=OC_MINI(3,embs[_mbi].ncneighbors);
339
818k
    for(ci=0;ci<ncs;ci++){
340
512k
      t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
341
512k
    }
342
306k
    t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
343
    /*Examine the candidates in set A.*/
344
1.75M
    for(ci=1;ci<mcenc.setb0;ci++){
345
1.45M
      candx=OC_DIV2(mcenc.candidates[ci][0]);
346
1.45M
      candy=OC_DIV2(mcenc.candidates[ci][1]);
347
      /*If we've already examined this vector, then we would be using it if it
348
         was better than what we are using.*/
349
1.45M
      hitbit=(ogg_int32_t)1<<candx+15;
350
1.45M
      if(hit_cache[candy+15]&hitbit)continue;
351
598k
      hit_cache[candy+15]|=hitbit;
352
598k
      err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
353
598k
       frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
354
598k
      if(err<best_err){
355
218k
        best_err=err;
356
218k
        best_vec[0]=candx;
357
218k
        best_vec[1]=candy;
358
218k
      }
359
598k
      if(_frame==OC_FRAME_PREV){
360
1.30M
        for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
361
392k
          best_block_err[bi]=block_err[bi];
362
392k
          best_block_vec[bi][0]=candx;
363
392k
          best_block_vec[bi][1]=candy;
364
392k
        }
365
261k
      }
366
598k
    }
367
306k
    if(best_err>t2){
368
87.5k
      oc_mcenc_find_candidates_b(_enc,&mcenc,_accum,_mbi,_frame);
369
      /*Examine the candidates in set B.*/
370
175k
      for(;ci<mcenc.ncandidates;ci++){
371
87.5k
        candx=OC_DIV2(mcenc.candidates[ci][0]);
372
87.5k
        candy=OC_DIV2(mcenc.candidates[ci][1]);
373
87.5k
        hitbit=(ogg_int32_t)1<<candx+15;
374
87.5k
        if(hit_cache[candy+15]&hitbit)continue;
375
37.4k
        hit_cache[candy+15]|=hitbit;
376
37.4k
        err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
377
37.4k
         frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
378
37.4k
        if(err<best_err){
379
11.5k
          best_err=err;
380
11.5k
          best_vec[0]=candx;
381
11.5k
          best_vec[1]=candy;
382
11.5k
        }
383
37.4k
        if(_frame==OC_FRAME_PREV){
384
100k
          for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
385
24.8k
            best_block_err[bi]=block_err[bi];
386
24.8k
            best_block_vec[bi][0]=candx;
387
24.8k
            best_block_vec[bi][1]=candy;
388
24.8k
          }
389
20.0k
        }
390
37.4k
      }
391
      /*Use the same threshold for set B as in set A.*/
392
87.5k
      if(best_err>t2){
393
84.1k
        int best_site;
394
84.1k
        int nsites;
395
84.1k
        int sitei;
396
84.1k
        int site;
397
84.1k
        int b;
398
        /*Square pattern search.*/
399
400k
        for(;;){
400
400k
          best_site=4;
401
          /*Compose the bit flags for boundary conditions.*/
402
400k
          b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
403
400k
           OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
404
400k
          nsites=OC_SQUARE_NSITES[b];
405
3.39M
          for(sitei=0;sitei<nsites;sitei++){
406
2.99M
            site=OC_SQUARE_SITES[b][sitei];
407
2.99M
            candx=best_vec[0]+OC_SQUARE_DX[site];
408
2.99M
            candy=best_vec[1]+OC_SQUARE_DY[site];
409
2.99M
            hitbit=(ogg_int32_t)1<<candx+15;
410
2.99M
            if(hit_cache[candy+15]&hitbit)continue;
411
1.69M
            hit_cache[candy+15]|=hitbit;
412
1.69M
            err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
413
1.69M
             frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
414
1.69M
            if(err<best_err){
415
499k
              best_err=err;
416
499k
              best_site=site;
417
499k
            }
418
1.69M
            if(_frame==OC_FRAME_PREV){
419
4.64M
              for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
420
617k
                best_block_err[bi]=block_err[bi];
421
617k
                best_block_vec[bi][0]=candx;
422
617k
                best_block_vec[bi][1]=candy;
423
617k
              }
424
928k
            }
425
1.69M
          }
426
400k
          if(best_site==4)break;
427
316k
          best_vec[0]+=OC_SQUARE_DX[best_site];
428
316k
          best_vec[1]+=OC_SQUARE_DY[best_site];
429
316k
        }
430
        /*Final 4-MV search.*/
431
        /*Simply use 1/4 of the macro block set A and B threshold as the
432
           individual block threshold.*/
433
84.1k
        if(_frame==OC_FRAME_PREV){
434
45.1k
          t2>>=2;
435
225k
          for(bi=0;bi<4;bi++){
436
180k
            if(best_block_err[bi]>t2){
437
              /*Square pattern search.
438
                We do this in a slightly interesting manner.
439
                We continue to check the SAD of all four blocks in the
440
                 macro block.
441
                This gives us two things:
442
                 1) We can continue to use the hit_cache to avoid duplicate
443
                     checks.
444
                    Otherwise we could continue to read it, but not write to it
445
                     without saving and restoring it for each block.
446
                    Note that we could still eliminate a large number of
447
                     duplicate checks by taking into account the site we came
448
                     from when choosing the site list.
449
                    We can still do that to avoid extra hit_cache queries, and
450
                     it might even be a speed win.
451
                 2) It gives us a slightly better chance of escaping local
452
                     minima.
453
                    We would not be here if we weren't doing a fairly bad job
454
                     in finding a good vector, and checking these vectors can
455
                     save us from 100 to several thousand points off our SAD 1
456
                     in 15 times.
457
                TODO: Is this a good idea?
458
                Who knows.
459
                It needs more testing.*/
460
155k
              for(;;){
461
155k
                int bestx;
462
155k
                int besty;
463
155k
                int bj;
464
155k
                bestx=best_block_vec[bi][0];
465
155k
                besty=best_block_vec[bi][1];
466
                /*Compose the bit flags for boundary conditions.*/
467
155k
                b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
468
155k
                 OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
469
155k
                nsites=OC_SQUARE_NSITES[b];
470
1.32M
                for(sitei=0;sitei<nsites;sitei++){
471
1.17M
                  site=OC_SQUARE_SITES[b][sitei];
472
1.17M
                  candx=bestx+OC_SQUARE_DX[site];
473
1.17M
                  candy=besty+OC_SQUARE_DY[site];
474
1.17M
                  hitbit=(ogg_int32_t)1<<candx+15;
475
1.17M
                  if(hit_cache[candy+15]&hitbit)continue;
476
449k
                  hit_cache[candy+15]|=hitbit;
477
449k
                  err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
478
449k
                   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
479
449k
                  if(err<best_err){
480
36.9k
                    best_err=err;
481
36.9k
                    best_vec[0]=candx;
482
36.9k
                    best_vec[1]=candy;
483
36.9k
                  }
484
2.24M
                  for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
485
189k
                    best_block_err[bj]=block_err[bj];
486
189k
                    best_block_vec[bj][0]=candx;
487
189k
                    best_block_vec[bj][1]=candy;
488
189k
                  }
489
449k
                }
490
155k
                if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
491
72.3k
                  break;
492
72.3k
                }
493
155k
              }
494
72.3k
            }
495
180k
          }
496
45.1k
        }
497
84.1k
      }
498
87.5k
    }
499
306k
  }
500
337k
  embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
501
337k
  candx=best_vec[0];
502
337k
  candy=best_vec[1];
503
337k
  embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
504
337k
   frag_buf_offs,fragis,candx,candy,src,satd_ref,ystride);
505
337k
  embs[_mbi].analysis_mv[0][_frame]=OC_MV(candx<<1,candy<<1);
506
337k
  if(_frame==OC_FRAME_PREV&&_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
507
844k
    for(bi=0;bi<4;bi++){
508
675k
      candx=best_block_vec[bi][0];
509
675k
      candy=best_block_vec[bi][1];
510
675k
      embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
511
675k
       frag_buf_offs[fragis[bi]],candx,candy,src,satd_ref,ystride);
512
675k
      embs[_mbi].block_mv[bi]=OC_MV(candx<<1,candy<<1);
513
675k
    }
514
168k
  }
515
337k
}
516
517
168k
void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
518
168k
  oc_mv2 *mvs;
519
168k
  oc_mv   accum_p;
520
168k
  oc_mv   accum_g;
521
168k
  oc_mv   mv2_p;
522
168k
  mvs=_enc->mb_info[_mbi].analysis_mv;
523
168k
  if(_enc->prevframe_dropped)accum_p=mvs[0][OC_FRAME_PREV];
524
144k
  else accum_p=0;
525
168k
  accum_g=mvs[2][OC_FRAME_GOLD];
526
  /*Move the motion vector predictors back a frame.*/
527
168k
  mv2_p=mvs[2][OC_FRAME_PREV];
528
168k
  mvs[2][OC_FRAME_GOLD]=mvs[1][OC_FRAME_GOLD];
529
168k
  mvs[2][OC_FRAME_PREV]=mvs[1][OC_FRAME_PREV];
530
168k
  mvs[1][OC_FRAME_GOLD]=mvs[0][OC_FRAME_GOLD];
531
168k
  mvs[1][OC_FRAME_PREV]=OC_MV_SUB(mvs[0][OC_FRAME_PREV],mv2_p);
532
  /*Search the last frame.*/
533
168k
  oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV,OC_FRAME_PREV_ORIG);
534
168k
  mvs[2][OC_FRAME_PREV]=accum_p;
535
  /*GOLDEN MVs are different from PREV MVs in that they're each absolute
536
     offsets from some frame in the past rather than relative offsets from the
537
     frame before.
538
    For predictor calculation to make sense, we need them to be in the same
539
     form as PREV MVs.*/
540
168k
  mvs[1][OC_FRAME_GOLD]=OC_MV_SUB(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
541
168k
  mvs[2][OC_FRAME_GOLD]=OC_MV_SUB(mvs[2][OC_FRAME_GOLD],accum_g);
542
  /*Search the golden frame.*/
543
168k
  oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD,OC_FRAME_GOLD_ORIG);
544
  /*Put GOLDEN MVs back into absolute offset form.
545
    The newest MV is already an absolute offset.*/
546
168k
  mvs[2][OC_FRAME_GOLD]=OC_MV_ADD(mvs[2][OC_FRAME_GOLD],accum_g);
547
168k
  mvs[1][OC_FRAME_GOLD]=OC_MV_ADD(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
548
168k
}
549
550
#if 0
551
static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
552
 int _vec[2],int _best_err,int _frame){
553
  const unsigned char *src;
554
  const unsigned char *ref;
555
  const ptrdiff_t     *frag_buf_offs;
556
  const ptrdiff_t     *fragis;
557
  int                  offset_y[9];
558
  int                  ystride;
559
  int                  mvoffset_base;
560
  int                  best_site;
561
  int                  sitei;
562
  int                  err;
563
  src=_enc->state.ref_frame_data[OC_FRAME_IO];
564
  ref=_enc->state.ref_frame_data[_framei];
565
  frag_buf_offs=_enc->state.frag_buf_offs;
566
  fragis=_enc->state.mb_maps[_mbi][0];
567
  ystride=_enc->state.ref_ystride[0];
568
  mvoffset_base=_vec[0]+_vec[1]*ystride;
569
  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
570
  offset_y[3]=offset_y[5]=0;
571
  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
572
  best_site=4;
573
  for(sitei=0;sitei<8;sitei++){
574
    int site;
575
    int xmask;
576
    int ymask;
577
    int dx;
578
    int dy;
579
    int mvoffset0;
580
    int mvoffset1;
581
    site=OC_SQUARE_SITES[0][sitei];
582
    dx=OC_SQUARE_DX[site];
583
    dy=OC_SQUARE_DY[site];
584
    /*The following code SHOULD be equivalent to
585
        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
586
         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
587
      However, it should also be much faster, as it involves no multiplies and
588
       doesn't have to handle chroma vectors.*/
589
    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
590
    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
591
    mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
592
    mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
593
    err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
594
     mvoffset0,mvoffset1,src,ref,ystride,_best_err);
595
    if(err<_best_err){
596
      _best_err=err;
597
      best_site=site;
598
    }
599
  }
600
  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
601
  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
602
  return _best_err;
603
}
604
#endif
605
606
static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
607
216k
 int _mbi,int _vec[2],unsigned _best_err,int _frame){
608
216k
  const unsigned char *src;
609
216k
  const unsigned char *ref;
610
216k
  const ptrdiff_t     *frag_buf_offs;
611
216k
  const ptrdiff_t     *fragis;
612
216k
  int                  offset_y[9];
613
216k
  int                  ystride;
614
216k
  int                  mvoffset_base;
615
216k
  int                  best_site;
616
216k
  int                  sitei;
617
216k
  int                  err;
618
216k
  src=_enc->state.ref_frame_data[OC_FRAME_IO];
619
216k
  ref=_enc->state.ref_frame_data[_frame];
620
216k
  frag_buf_offs=_enc->state.frag_buf_offs;
621
216k
  fragis=_enc->state.mb_maps[_mbi][0];
622
216k
  ystride=_enc->state.ref_ystride[0];
623
216k
  mvoffset_base=_vec[0]+_vec[1]*ystride;
624
216k
  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
625
216k
  offset_y[3]=offset_y[5]=0;
626
216k
  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
627
216k
  best_site=4;
628
1.94M
  for(sitei=0;sitei<8;sitei++){
629
1.72M
    int site;
630
1.72M
    int xmask;
631
1.72M
    int ymask;
632
1.72M
    int dx;
633
1.72M
    int dy;
634
1.72M
    int mvoffset0;
635
1.72M
    int mvoffset1;
636
1.72M
    site=OC_SQUARE_SITES[0][sitei];
637
1.72M
    dx=OC_SQUARE_DX[site];
638
1.72M
    dy=OC_SQUARE_DY[site];
639
    /*The following code SHOULD be equivalent to
640
        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
641
         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
642
      However, it should also be much faster, as it involves no multiplies and
643
       doesn't have to handle chroma vectors.*/
644
1.72M
    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
645
1.72M
    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
646
1.72M
    mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
647
1.72M
    mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
648
1.72M
    if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
649
1.72M
      err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
650
1.72M
       mvoffset0,mvoffset1,src,ref,ystride,_best_err);
651
1.72M
    }
652
0
    else{
653
0
      err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
654
0
           mvoffset0,mvoffset1,src,ref,ystride,_best_err);
655
0
    }
656
1.72M
    if(err<_best_err){
657
432k
      _best_err=err;
658
432k
      best_site=site;
659
432k
    }
660
1.72M
  }
661
216k
  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
662
216k
  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
663
216k
  return _best_err;
664
216k
}
665
666
216k
void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
667
216k
  oc_mb_enc_info *embs;
668
216k
  int             vec[2];
669
216k
  embs=_enc->mb_info;
670
216k
  vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].analysis_mv[0][_frame]));
671
216k
  vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].analysis_mv[0][_frame]));
672
216k
  embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
673
216k
   _mbi,vec,embs[_mbi].satd[_frame],_frame);
674
216k
  embs[_mbi].analysis_mv[0][_frame]=OC_MV(vec[0],vec[1]);
675
216k
}
676
677
#if 0
678
static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
679
 int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
680
 int _offset_y[9],unsigned _best_err){
681
  int mvoffset_base;
682
  int best_site;
683
  int sitei;
684
  mvoffset_base=_vec[0]+_vec[1]*_ystride;
685
  best_site=4;
686
  for(sitei=0;sitei<8;sitei++){
687
    unsigned err;
688
    int      site;
689
    int      xmask;
690
    int      ymask;
691
    int      dx;
692
    int      dy;
693
    int      mvoffset0;
694
    int      mvoffset1;
695
    site=OC_SQUARE_SITES[0][sitei];
696
    dx=OC_SQUARE_DX[site];
697
    dy=OC_SQUARE_DY[site];
698
    /*The following code SHOULD be equivalent to
699
        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
700
         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
701
      However, it should also be much faster, as it involves no multiplies and
702
       doesn't have to handle chroma vectors.*/
703
    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
704
    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
705
    mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
706
    mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
707
    err=oc_enc_frag_sad2_thresh(_enc,_src,
708
     _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
709
    if(err<_best_err){
710
      _best_err=err;
711
      best_site=site;
712
    }
713
  }
714
  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
715
  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
716
  return _best_err;
717
}
718
#endif
719
720
static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
721
 int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
722
108k
 int _offset_y[9],unsigned _best_err){
723
108k
  int mvoffset_base;
724
108k
  int best_site;
725
108k
  int sitei;
726
108k
  mvoffset_base=_vec[0]+_vec[1]*_ystride;
727
108k
  best_site=4;
728
978k
  for(sitei=0;sitei<8;sitei++){
729
869k
    unsigned err;
730
869k
    int      dc;
731
869k
    int      site;
732
869k
    int      xmask;
733
869k
    int      ymask;
734
869k
    int      dx;
735
869k
    int      dy;
736
869k
    int      mvoffset0;
737
869k
    int      mvoffset1;
738
869k
    site=OC_SQUARE_SITES[0][sitei];
739
869k
    dx=OC_SQUARE_DX[site];
740
869k
    dy=OC_SQUARE_DY[site];
741
    /*The following code SHOULD be equivalent to
742
        oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
743
         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
744
      However, it should also be much faster, as it involves no multiplies and
745
       doesn't have to handle chroma vectors.*/
746
869k
    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
747
869k
    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
748
869k
    mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
749
869k
    mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
750
869k
    err=oc_enc_frag_satd2(_enc,&dc,_src,
751
869k
     _ref+mvoffset0,_ref+mvoffset1,_ystride);
752
869k
    err+=abs(dc);
753
869k
    if(err<_best_err){
754
227k
      _best_err=err;
755
227k
      best_site=site;
756
227k
    }
757
869k
  }
758
108k
  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
759
108k
  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
760
108k
  return _best_err;
761
108k
}
762
763
27.1k
void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
764
27.1k
  oc_mb_enc_info      *embs;
765
27.1k
  const ptrdiff_t     *frag_buf_offs;
766
27.1k
  const ptrdiff_t     *fragis;
767
27.1k
  const unsigned char *src;
768
27.1k
  const unsigned char *ref;
769
27.1k
  int                  offset_y[9];
770
27.1k
  int                  ystride;
771
27.1k
  int                  bi;
772
27.1k
  ystride=_enc->state.ref_ystride[0];
773
27.1k
  frag_buf_offs=_enc->state.frag_buf_offs;
774
27.1k
  fragis=_enc->state.mb_maps[_mbi][0];
775
27.1k
  src=_enc->state.ref_frame_data[OC_FRAME_IO];
776
27.1k
  ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
777
27.1k
  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
778
27.1k
  offset_y[3]=offset_y[5]=0;
779
27.1k
  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
780
27.1k
  embs=_enc->mb_info;
781
135k
  for(bi=0;bi<4;bi++){
782
108k
    ptrdiff_t frag_offs;
783
108k
    int       vec[2];
784
108k
    frag_offs=frag_buf_offs[fragis[bi]];
785
108k
    vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].block_mv[bi]));
786
108k
    vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].block_mv[bi]));
787
108k
    embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
788
108k
     src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
789
108k
    embs[_mbi].ref_mv[bi]=OC_MV(vec[0],vec[1]);
790
108k
  }
791
27.1k
}