Coverage Report

Created: 2026-05-16 07:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/contrib/contrib-build/libtheora/lib/decode.c
Line
Count
Source
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7
 *                                                                  *
8
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009,2025           *
9
 * by the Xiph.Org Foundation and contributors                      *
10
 * https://www.xiph.org/                                            *
11
 *                                                                  *
12
 ********************************************************************
13
14
  function:
15
16
 ********************************************************************/
17
18
#include <stdlib.h>
19
#include <string.h>
20
#include <ogg/ogg.h>
21
#include "decint.h"
22
#if defined(OC_DUMP_IMAGES)
23
# include <stdio.h>
24
# include "png.h"
25
#endif
26
#if defined(HAVE_CAIRO)
27
# include <cairo.h>
28
#endif
29
30
31
/*No post-processing.*/
32
12.5k
#define OC_PP_LEVEL_DISABLED  (0)
33
/*Keep track of DC qi for each block only.*/
34
0
#define OC_PP_LEVEL_TRACKDCQI (1)
35
/*Deblock the luma plane.*/
36
84.9k
#define OC_PP_LEVEL_DEBLOCKY  (2)
37
/*Dering the luma plane.*/
38
0
#define OC_PP_LEVEL_DERINGY   (3)
39
/*Stronger luma plane deringing.*/
40
0
#define OC_PP_LEVEL_SDERINGY  (4)
41
/*Deblock the chroma planes.*/
42
0
#define OC_PP_LEVEL_DEBLOCKC  (5)
43
/*Dering the chroma planes.*/
44
#define OC_PP_LEVEL_DERINGC   (6)
45
/*Stronger chroma plane deringing.*/
46
0
#define OC_PP_LEVEL_SDERINGC  (7)
47
/*Maximum valid post-processing level.*/
48
0
#define OC_PP_LEVEL_MAX       (7)
49
50
51
52
/*The mode alphabets for the various mode coding schemes.
53
  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54
static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55
  /*Last MV dominates */
56
  {
57
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59
    OC_MODE_INTER_MV_FOUR
60
  },
61
  {
62
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63
    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64
    OC_MODE_INTER_MV_FOUR
65
  },
66
  {
67
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69
    OC_MODE_INTER_MV_FOUR
70
  },
71
  {
72
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73
    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74
    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75
  },
76
  /*No MV dominates.*/
77
  {
78
    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79
    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80
    OC_MODE_INTER_MV_FOUR
81
  },
82
  {
83
    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84
    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85
    OC_MODE_INTER_MV_FOUR
86
  },
87
  /*Default ordering.*/
88
  {
89
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90
    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91
    OC_MODE_INTER_MV_FOUR
92
  }
93
};
94
95
96
/*The original DCT tokens are extended and reordered during the construction of
97
   the Huffman tables.
98
  The extension means more bits can be read with fewer calls to the bitpacker
99
   during the Huffman decoding process (at the cost of larger Huffman tables),
100
   and fewer tokens require additional extra bits (reducing the average storage
101
   per decoded token).
102
  The revised ordering reveals essential information in the token value
103
   itself; specifically, whether or not there are additional extra bits to read
104
   and the parameter to which those extra bits are applied.
105
  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106
  The extra bits are added into code word at the bit position inferred from the
107
   token value, giving the final code word from which all required parameters
108
   are derived.
109
  The number of EOBs and the leading zero run length can be extracted directly.
110
  The coefficient magnitude is optionally negated before extraction, according
111
   to a 'flip' bit.*/
112
113
/*The number of additional extra bits that are decoded with each of the
114
   internal DCT tokens.*/
115
static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116
  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117
};
118
119
/*Whether or not an internal token needs any additional extra bits.*/
120
#define OC_DCT_TOKEN_NEEDS_MORE(token) \
121
565M
 (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122
565M
  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124
/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125
295M
#define OC_DCT_TOKEN_FAT_EOB (0)
126
127
/*The number of EOBs to use for an end-of-frame token.
128
  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129
   is not yet available everywhere; this should be equivalent.*/
130
0
#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132
/*The location of the (6) run length bits in the code word.
133
  These are placed at index 0 and given 8 bits (even though 6 would suffice)
134
   because it may be faster to extract the lower byte on some platforms.*/
135
565M
#define OC_DCT_CW_RLEN_SHIFT (0)
136
/*The location of the (12) EOB bits in the code word.*/
137
590M
#define OC_DCT_CW_EOB_SHIFT  (8)
138
/*The location of the (1) flip bit in the code word.
139
  This must be right under the magnitude bits.*/
140
296M
#define OC_DCT_CW_FLIP_BIT   (20)
141
/*The location of the (11) token magnitude bits in the code word.
142
  These must be last, and rely on a sign-extending right shift.*/
143
344M
#define OC_DCT_CW_MAG_SHIFT  (21)
144
145
/*Pack the given fields into a code word.*/
146
#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147
 ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148
 (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149
 (_flip)<<OC_DCT_CW_FLIP_BIT| \
150
 ((_mag)-(_flip))*(1<<OC_DCT_CW_MAG_SHIFT))
151
152
/*A special code word value that signals the end of the frame (a long EOB run
153
   of zero).*/
154
282M
#define OC_DCT_CW_FINISH (0)
155
156
/*The position at which to insert the extra bits in the code word.
157
  We use this formulation because Intel has no useful cmov.
158
  A real architecture would probably do better with two of those.
159
  This translates to 11 instructions(!), and is _still_ faster than either a
160
   table lookup (just barely) or the naive double-ternary implementation (which
161
   gcc translates to a jump and a cmov).
162
  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163
   you want to make one of the other shifts zero.*/
164
#define OC_DCT_TOKEN_EB_POS(_token) \
165
24.3M
 ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166
24.3M
 +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168
/*The code words for each internal token.
169
  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170
   order.*/
171
static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172
  /*These tokens require additional extra bits for the EOB count.*/
173
  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174
  OC_DCT_CW_FINISH,
175
  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176
  OC_DCT_CW_PACK(16, 0,  0,0),
177
  /*These tokens require additional extra bits for the magnitude.*/
178
  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179
  OC_DCT_CW_PACK( 0, 0, 13,0),
180
  OC_DCT_CW_PACK( 0, 0, 13,1),
181
  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182
  OC_DCT_CW_PACK( 0, 0, 21,0),
183
  OC_DCT_CW_PACK( 0, 0, 21,1),
184
  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185
  OC_DCT_CW_PACK( 0, 0, 37,0),
186
  OC_DCT_CW_PACK( 0, 0, 37,1),
187
  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188
  OC_DCT_CW_PACK( 0, 0, 69,0),
189
  OC_DCT_CW_PACK( 0, 0,325,0),
190
  OC_DCT_CW_PACK( 0, 0, 69,1),
191
  OC_DCT_CW_PACK( 0, 0,325,1),
192
  /*These tokens require additional extra bits for the run length.*/
193
  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194
  OC_DCT_CW_PACK( 0,10, +1,0),
195
  OC_DCT_CW_PACK( 0,10, -1,0),
196
  /*OC_DCT_ZRL_TOKEN (6 extra bits)
197
    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198
  OC_DCT_CW_PACK( 0, 0,  0,1),
199
  /*The remaining tokens require no additional extra bits.*/
200
  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201
  OC_DCT_CW_PACK( 1, 0,  0,0),
202
  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203
  OC_DCT_CW_PACK( 2, 0,  0,0),
204
  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205
  OC_DCT_CW_PACK( 3, 0,  0,0),
206
  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207
  OC_DCT_CW_PACK( 0, 1, +1,0),
208
  OC_DCT_CW_PACK( 0, 1, -1,0),
209
  OC_DCT_CW_PACK( 0, 2, +1,0),
210
  OC_DCT_CW_PACK( 0, 2, -1,0),
211
  OC_DCT_CW_PACK( 0, 3, +1,0),
212
  OC_DCT_CW_PACK( 0, 3, -1,0),
213
  OC_DCT_CW_PACK( 0, 4, +1,0),
214
  OC_DCT_CW_PACK( 0, 4, -1,0),
215
  OC_DCT_CW_PACK( 0, 5, +1,0),
216
  OC_DCT_CW_PACK( 0, 5, -1,0),
217
  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218
  OC_DCT_CW_PACK( 0, 1, +2,0),
219
  OC_DCT_CW_PACK( 0, 1, +3,0),
220
  OC_DCT_CW_PACK( 0, 1, -2,0),
221
  OC_DCT_CW_PACK( 0, 1, -3,0),
222
  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223
  OC_DCT_CW_PACK( 0, 6, +1,0),
224
  OC_DCT_CW_PACK( 0, 7, +1,0),
225
  OC_DCT_CW_PACK( 0, 8, +1,0),
226
  OC_DCT_CW_PACK( 0, 9, +1,0),
227
  OC_DCT_CW_PACK( 0, 6, -1,0),
228
  OC_DCT_CW_PACK( 0, 7, -1,0),
229
  OC_DCT_CW_PACK( 0, 8, -1,0),
230
  OC_DCT_CW_PACK( 0, 9, -1,0),
231
  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232
  OC_DCT_CW_PACK( 0, 2, +2,0),
233
  OC_DCT_CW_PACK( 0, 3, +2,0),
234
  OC_DCT_CW_PACK( 0, 2, +3,0),
235
  OC_DCT_CW_PACK( 0, 3, +3,0),
236
  OC_DCT_CW_PACK( 0, 2, -2,0),
237
  OC_DCT_CW_PACK( 0, 3, -2,0),
238
  OC_DCT_CW_PACK( 0, 2, -3,0),
239
  OC_DCT_CW_PACK( 0, 3, -3,0),
240
  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241
    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242
  OC_DCT_CW_PACK( 0, 0,  0,1),
243
  OC_DCT_CW_PACK( 0, 1,  0,0),
244
  OC_DCT_CW_PACK( 0, 2,  0,0),
245
  OC_DCT_CW_PACK( 0, 3,  0,0),
246
  OC_DCT_CW_PACK( 0, 4,  0,0),
247
  OC_DCT_CW_PACK( 0, 5,  0,0),
248
  OC_DCT_CW_PACK( 0, 6,  0,0),
249
  OC_DCT_CW_PACK( 0, 7,  0,0),
250
  /*OC_ONE_TOKEN (0 extra bits)*/
251
  OC_DCT_CW_PACK( 0, 0, +1,0),
252
  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253
  OC_DCT_CW_PACK( 0, 0, -1,0),
254
  /*OC_TWO_TOKEN (0 extra bits)*/
255
  OC_DCT_CW_PACK( 0, 0, +2,0),
256
  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257
  OC_DCT_CW_PACK( 0, 0, -2,0),
258
  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259
  OC_DCT_CW_PACK( 0, 0, +3,0),
260
  OC_DCT_CW_PACK( 0, 0, -3,0),
261
  OC_DCT_CW_PACK( 0, 0, +4,0),
262
  OC_DCT_CW_PACK( 0, 0, -4,0),
263
  OC_DCT_CW_PACK( 0, 0, +5,0),
264
  OC_DCT_CW_PACK( 0, 0, -5,0),
265
  OC_DCT_CW_PACK( 0, 0, +6,0),
266
  OC_DCT_CW_PACK( 0, 0, -6,0),
267
  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268
  OC_DCT_CW_PACK( 0, 0, +7,0),
269
  OC_DCT_CW_PACK( 0, 0, +8,0),
270
  OC_DCT_CW_PACK( 0, 0, -7,0),
271
  OC_DCT_CW_PACK( 0, 0, -8,0),
272
  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273
  OC_DCT_CW_PACK( 0, 0, +9,0),
274
  OC_DCT_CW_PACK( 0, 0,+10,0),
275
  OC_DCT_CW_PACK( 0, 0,+11,0),
276
  OC_DCT_CW_PACK( 0, 0,+12,0),
277
  OC_DCT_CW_PACK( 0, 0, -9,0),
278
  OC_DCT_CW_PACK( 0, 0,-10,0),
279
  OC_DCT_CW_PACK( 0, 0,-11,0),
280
  OC_DCT_CW_PACK( 0, 0,-12,0),
281
  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282
  OC_DCT_CW_PACK( 8, 0,  0,0),
283
  OC_DCT_CW_PACK( 9, 0,  0,0),
284
  OC_DCT_CW_PACK(10, 0,  0,0),
285
  OC_DCT_CW_PACK(11, 0,  0,0),
286
  OC_DCT_CW_PACK(12, 0,  0,0),
287
  OC_DCT_CW_PACK(13, 0,  0,0),
288
  OC_DCT_CW_PACK(14, 0,  0,0),
289
  OC_DCT_CW_PACK(15, 0,  0,0),
290
  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291
  OC_DCT_CW_PACK( 4, 0,  0,0),
292
  OC_DCT_CW_PACK( 5, 0,  0,0),
293
  OC_DCT_CW_PACK( 6, 0,  0,0),
294
  OC_DCT_CW_PACK( 7, 0,  0,0),
295
};
296
297
298
299
718k
static int oc_sb_run_unpack(oc_pack_buf *_opb){
300
  /*Coding scheme:
301
       Codeword            Run Length
302
     0                       1
303
     10x                     2-3
304
     110x                    4-5
305
     1110xx                  6-9
306
     11110xxx                10-17
307
     111110xxxx              18-33
308
     111111xxxxxxxxxxxx      34-4129*/
309
718k
  static const ogg_int16_t OC_SB_RUN_TREE[22]={
310
718k
    4,
311
718k
     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312
718k
     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313
718k
     -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314
718k
     -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315
718k
      2,
316
718k
       -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317
718k
  };
318
718k
  int ret;
319
718k
  ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320
718k
  if(ret>=0x10){
321
24.1k
    int offs;
322
24.1k
    offs=ret&0x1F;
323
24.1k
    ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324
24.1k
  }
325
718k
  return ret;
326
718k
}
327
328
1.60M
static int oc_block_run_unpack(oc_pack_buf *_opb){
329
  /*Coding scheme:
330
     Codeword             Run Length
331
     0x                      1-2
332
     10x                     3-4
333
     110x                    5-6
334
     1110xx                  7-10
335
     11110xx                 11-14
336
     11111xxxx               15-30*/
337
1.60M
  static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338
1.60M
    5,
339
1.60M
     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340
1.60M
     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341
1.60M
     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342
1.60M
     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343
1.60M
     -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344
1.60M
     -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345
1.60M
     -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346
1.60M
     33,       36,       39,       44,
347
1.60M
      1,-(1<<8|7),-(1<<8|8),
348
1.60M
      1,-(1<<8|9),-(1<<8|10),
349
1.60M
      2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350
1.60M
      4,
351
1.60M
       -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352
1.60M
       -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353
1.60M
       -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354
1.60M
       -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355
1.60M
  };
356
1.60M
  return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357
1.60M
}
358
359
360
361
81
void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362
# if defined(OC_DEC_USE_VTABLE)
363
  _dec->opt_vtable.dc_unpredict_mcu_plane=
364
   oc_dec_dc_unpredict_mcu_plane_c;
365
# endif
366
81
}
367
368
static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369
81
 const th_setup_info *_setup){
370
81
  int qti;
371
81
  int pli;
372
81
  int qi;
373
81
  int ret;
374
81
  ret=oc_state_init(&_dec->state,_info,3);
375
81
  if(ret<0)return ret;
376
81
  ret=oc_huff_trees_copy(_dec->huff_tables,
377
81
   (const ogg_int16_t *const *)_setup->huff_tables);
378
81
  if(ret<0){
379
0
    oc_state_clear(&_dec->state);
380
0
    return ret;
381
0
  }
382
  /*For each fragment, allocate one byte for every DCT coefficient token, plus
383
     one byte for extra-bits for each token, plus one more byte for the long
384
     EOB run, just in case it's the very last token and has a run length of
385
     one.*/
386
81
  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387
81
   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388
81
  if(_dec->dct_tokens==NULL){
389
0
    oc_huff_trees_clear(_dec->huff_tables);
390
0
    oc_state_clear(&_dec->state);
391
0
    return TH_EFAULT;
392
0
  }
393
46.6k
  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394
31.1k
    _dec->state.dequant_tables[qi][pli][qti]=
395
31.1k
     _dec->state.dequant_table_data[qi][pli][qti];
396
31.1k
  }
397
81
  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398
81
   &_setup->qinfo);
399
5.26k
  for(qi=0;qi<64;qi++){
400
5.18k
    int qsum;
401
5.18k
    qsum=0;
402
41.4k
    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403
31.1k
      qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404
31.1k
       _dec->state.dequant_tables[qi][pli][qti][17]+
405
31.1k
       _dec->state.dequant_tables[qi][pli][qti][18]+
406
31.1k
       _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407
31.1k
    }
408
5.18k
    _dec->pp_sharp_mod[qi]=-(qsum>>11);
409
5.18k
  }
410
81
  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411
81
   sizeof(_dec->state.loop_filter_limits));
412
81
  oc_dec_accel_init(_dec);
413
81
  _dec->pp_level=OC_PP_LEVEL_DISABLED;
414
81
  _dec->dc_qis=NULL;
415
81
  _dec->variances=NULL;
416
81
  _dec->pp_frame_data=NULL;
417
81
  _dec->stripe_cb.ctx=NULL;
418
81
  _dec->stripe_cb.stripe_decoded=NULL;
419
#if defined(HAVE_CAIRO)
420
  _dec->telemetry_bits=0;
421
  _dec->telemetry_qi=0;
422
  _dec->telemetry_mbmode=0;
423
  _dec->telemetry_mv=0;
424
  _dec->telemetry_frame_data=NULL;
425
#endif
426
81
  return 0;
427
81
}
428
429
81
static void oc_dec_clear(oc_dec_ctx *_dec){
430
#if defined(HAVE_CAIRO)
431
  _ogg_free(_dec->telemetry_frame_data);
432
#endif
433
81
  _ogg_free(_dec->pp_frame_data);
434
81
  _ogg_free(_dec->variances);
435
81
  _ogg_free(_dec->dc_qis);
436
81
  _ogg_free(_dec->dct_tokens);
437
81
  oc_huff_trees_clear(_dec->huff_tables);
438
81
  oc_state_clear(&_dec->state);
439
81
}
440
441
442
8.02k
static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
443
8.02k
  long val;
444
  /*Check to make sure this is a data packet.*/
445
8.02k
  val=oc_pack_read1(&_dec->opb);
446
8.02k
  if(val!=0)return TH_EBADPACKET;
447
  /*Read in the frame type (I or P).*/
448
6.47k
  val=oc_pack_read1(&_dec->opb);
449
6.47k
  _dec->state.frame_type=(int)val;
450
  /*Read in the qi list.*/
451
6.47k
  val=oc_pack_read(&_dec->opb,6);
452
6.47k
  _dec->state.qis[0]=(unsigned char)val;
453
6.47k
  val=oc_pack_read1(&_dec->opb);
454
6.47k
  if(!val)_dec->state.nqis=1;
455
61
  else{
456
61
    val=oc_pack_read(&_dec->opb,6);
457
61
    _dec->state.qis[1]=(unsigned char)val;
458
61
    val=oc_pack_read1(&_dec->opb);
459
61
    if(!val)_dec->state.nqis=2;
460
35
    else{
461
35
      val=oc_pack_read(&_dec->opb,6);
462
35
      _dec->state.qis[2]=(unsigned char)val;
463
35
      _dec->state.nqis=3;
464
35
    }
465
61
  }
466
6.47k
  if(_dec->state.frame_type==OC_INTRA_FRAME){
467
    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
468
      Most of the other unused bits in the VP3 headers were eliminated.
469
      I don't know why these remain.*/
470
    /*I wanted to eliminate wasted bits, but not all config wiggle room
471
       --Monty.*/
472
6.07k
    val=oc_pack_read(&_dec->opb,3);
473
6.07k
    if(val!=0)return TH_EIMPL;
474
6.07k
  }
475
6.23k
  return 0;
476
6.47k
}
477
478
/*Mark all fragments as coded and in OC_MODE_INTRA.
479
  This also builds up the coded fragment list (in coded order), and clears the
480
   uncoded fragment list.
481
  It does not update the coded macro block list nor the super block flags, as
482
   those are not used when decoding INTRA frames.*/
483
5.82k
static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
484
5.82k
  const oc_sb_map   *sb_maps;
485
5.82k
  const oc_sb_flags *sb_flags;
486
5.82k
  oc_fragment       *frags;
487
5.82k
  ptrdiff_t         *coded_fragis;
488
5.82k
  ptrdiff_t          ncoded_fragis;
489
5.82k
  ptrdiff_t          prev_ncoded_fragis;
490
5.82k
  unsigned           nsbs;
491
5.82k
  unsigned           sbi;
492
5.82k
  int                pli;
493
5.82k
  coded_fragis=_dec->state.coded_fragis;
494
5.82k
  prev_ncoded_fragis=ncoded_fragis=0;
495
5.82k
  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
496
5.82k
  sb_flags=_dec->state.sb_flags;
497
5.82k
  frags=_dec->state.frags;
498
5.82k
  sbi=nsbs=0;
499
23.3k
  for(pli=0;pli<3;pli++){
500
17.4k
    nsbs+=_dec->state.fplanes[pli].nsbs;
501
765k
    for(;sbi<nsbs;sbi++){
502
748k
      int quadi;
503
3.74M
      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
504
2.87M
        int bi;
505
14.3M
        for(bi=0;bi<4;bi++){
506
11.4M
          ptrdiff_t fragi;
507
11.4M
          fragi=sb_maps[sbi][quadi][bi];
508
11.4M
          if(fragi>=0){
509
11.2M
            frags[fragi].coded=1;
510
11.2M
            frags[fragi].refi=OC_FRAME_SELF;
511
11.2M
            frags[fragi].mb_mode=OC_MODE_INTRA;
512
11.2M
            coded_fragis[ncoded_fragis++]=fragi;
513
11.2M
          }
514
11.4M
        }
515
2.87M
      }
516
748k
    }
517
17.4k
    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
518
17.4k
    prev_ncoded_fragis=ncoded_fragis;
519
17.4k
  }
520
5.82k
  _dec->state.ntotal_coded_fragis=ncoded_fragis;
521
5.82k
}
522
523
/*Decodes the bit flags indicating whether each super block is partially coded
524
   or not.
525
  Return: The number of partially coded super blocks.*/
526
402
static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
527
402
  oc_sb_flags *sb_flags;
528
402
  unsigned     nsbs;
529
402
  unsigned     sbi;
530
402
  unsigned     npartial;
531
402
  unsigned     run_count;
532
402
  long         val;
533
402
  int          flag;
534
402
  val=oc_pack_read1(&_dec->opb);
535
402
  flag=(int)val;
536
402
  sb_flags=_dec->state.sb_flags;
537
402
  nsbs=_dec->state.nsbs;
538
402
  sbi=npartial=0;
539
114k
  while(sbi<nsbs){
540
114k
    int full_run;
541
114k
    run_count=oc_sb_run_unpack(&_dec->opb);
542
114k
    full_run=run_count>=4129;
543
531k
    do{
544
531k
      sb_flags[sbi].coded_partially=flag;
545
531k
      sb_flags[sbi].coded_fully=0;
546
531k
      npartial+=flag;
547
531k
      sbi++;
548
531k
    }
549
531k
    while(--run_count>0&&sbi<nsbs);
550
114k
    if(full_run&&sbi<nsbs){
551
0
      val=oc_pack_read1(&_dec->opb);
552
0
      flag=(int)val;
553
0
    }
554
114k
    else flag=!flag;
555
114k
  }
556
  /*TODO: run_count should be 0 here.
557
    If it's not, we should issue a warning of some kind.*/
558
402
  return npartial;
559
402
}
560
561
/*Decodes the bit flags for whether or not each non-partially-coded super
562
   block is fully coded or not.
563
  This function should only be called if there is at least one
564
   non-partially-coded super block.
565
  Return: The number of partially coded super blocks.*/
566
402
static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
567
402
  oc_sb_flags *sb_flags;
568
402
  unsigned     nsbs;
569
402
  unsigned     sbi;
570
402
  unsigned     run_count;
571
402
  long         val;
572
402
  int          flag;
573
402
  sb_flags=_dec->state.sb_flags;
574
402
  nsbs=_dec->state.nsbs;
575
  /*Skip partially coded super blocks.*/
576
2.41k
  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
577
402
  val=oc_pack_read1(&_dec->opb);
578
402
  flag=(int)val;
579
31.8k
  do{
580
31.8k
    int full_run;
581
31.8k
    run_count=oc_sb_run_unpack(&_dec->opb);
582
31.8k
    full_run=run_count>=4129;
583
561k
    for(;sbi<nsbs;sbi++){
584
561k
      if(sb_flags[sbi].coded_partially)continue;
585
318k
      if(run_count--<=0)break;
586
287k
      sb_flags[sbi].coded_fully=flag;
587
287k
    }
588
31.8k
    if(full_run&&sbi<nsbs){
589
0
      val=oc_pack_read1(&_dec->opb);
590
0
      flag=(int)val;
591
0
    }
592
31.8k
    else flag=!flag;
593
31.8k
  }
594
31.8k
  while(sbi<nsbs);
595
  /*TODO: run_count should be 0 here.
596
    If it's not, we should issue a warning of some kind.*/
597
402
}
598
599
402
static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
600
402
  const oc_sb_map   *sb_maps;
601
402
  const oc_sb_flags *sb_flags;
602
402
  signed char       *mb_modes;
603
402
  oc_fragment       *frags;
604
402
  unsigned           nsbs;
605
402
  unsigned           sbi;
606
402
  unsigned           npartial;
607
402
  long               val;
608
402
  int                pli;
609
402
  int                flag;
610
402
  int                run_count;
611
402
  ptrdiff_t         *coded_fragis;
612
402
  ptrdiff_t         *uncoded_fragis;
613
402
  ptrdiff_t          ncoded_fragis;
614
402
  ptrdiff_t          nuncoded_fragis;
615
402
  ptrdiff_t          prev_ncoded_fragis;
616
402
  npartial=oc_dec_partial_sb_flags_unpack(_dec);
617
402
  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
618
402
  if(npartial>0){
619
402
    val=oc_pack_read1(&_dec->opb);
620
402
    flag=!(int)val;
621
402
  }
622
0
  else flag=0;
623
402
  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
624
402
  sb_flags=_dec->state.sb_flags;
625
402
  mb_modes=_dec->state.mb_modes;
626
402
  frags=_dec->state.frags;
627
402
  sbi=nsbs=run_count=0;
628
402
  coded_fragis=_dec->state.coded_fragis;
629
402
  uncoded_fragis=coded_fragis+_dec->state.nfrags;
630
402
  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
631
1.60k
  for(pli=0;pli<3;pli++){
632
1.20k
    nsbs+=_dec->state.fplanes[pli].nsbs;
633
532k
    for(;sbi<nsbs;sbi++){
634
531k
      int quadi;
635
2.65M
      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
636
2.06M
        int quad_coded;
637
2.06M
        int bi;
638
2.06M
        quad_coded=0;
639
10.3M
        for(bi=0;bi<4;bi++){
640
8.26M
          ptrdiff_t fragi;
641
8.26M
          fragi=sb_maps[sbi][quadi][bi];
642
8.26M
          if(fragi>=0){
643
8.20M
            int coded;
644
8.20M
            if(sb_flags[sbi].coded_fully)coded=1;
645
6.84M
            else if(!sb_flags[sbi].coded_partially)coded=0;
646
3.80M
            else{
647
3.80M
              if(run_count<=0){
648
1.60M
                run_count=oc_block_run_unpack(&_dec->opb);
649
1.60M
                flag=!flag;
650
1.60M
              }
651
3.80M
              run_count--;
652
3.80M
              coded=flag;
653
3.80M
            }
654
8.20M
            if(coded)coded_fragis[ncoded_fragis++]=fragi;
655
5.26M
            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
656
8.20M
            quad_coded|=coded;
657
8.20M
            frags[fragi].coded=coded;
658
8.20M
            frags[fragi].refi=OC_FRAME_NONE;
659
8.20M
          }
660
8.26M
        }
661
        /*Remember if there's a coded luma block in this macro block.*/
662
2.06M
        if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
663
2.06M
      }
664
531k
    }
665
1.20k
    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
666
1.20k
    prev_ncoded_fragis=ncoded_fragis;
667
1.20k
  }
668
402
  _dec->state.ntotal_coded_fragis=ncoded_fragis;
669
  /*TODO: run_count should be 0 here.
670
    If it's not, we should issue a warning of some kind.*/
671
402
}
672
673
674
/*Coding scheme:
675
   Codeword            Mode Index
676
   0                       0
677
   10                      1
678
   110                     2
679
   1110                    3
680
   11110                   4
681
   111110                  5
682
   1111110                 6
683
   1111111                 7*/
684
static const ogg_int16_t OC_VLC_MODE_TREE[26]={
685
  4,
686
   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
687
   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688
   -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
689
   -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
690
    3,
691
     -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
692
     -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
693
};
694
695
static const ogg_int16_t OC_CLC_MODE_TREE[9]={
696
  3,
697
   -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
698
   -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
699
};
700
701
/*Unpacks the list of macro block modes for INTER frames.*/
702
402
static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
703
402
  signed char         *mb_modes;
704
402
  const unsigned char *alphabet;
705
402
  unsigned char        scheme0_alphabet[8];
706
402
  const ogg_int16_t   *mode_tree;
707
402
  size_t               nmbs;
708
402
  size_t               mbi;
709
402
  long                 val;
710
402
  int                  mode_scheme;
711
402
  val=oc_pack_read(&_dec->opb,3);
712
402
  mode_scheme=(int)val;
713
402
  if(mode_scheme==0){
714
272
    int mi;
715
    /*Just in case, initialize the modes to something.
716
      If the bitstream doesn't contain each index exactly once, it's likely
717
       corrupt and the rest of the packet is garbage anyway, but this way we
718
       won't crash, and we'll decode SOMETHING.*/
719
    /*LOOP VECTORIZES*/
720
2.44k
    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
721
2.44k
    for(mi=0;mi<OC_NMODES;mi++){
722
2.17k
      val=oc_pack_read(&_dec->opb,3);
723
2.17k
      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
724
2.17k
    }
725
272
    alphabet=scheme0_alphabet;
726
272
  }
727
130
  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
728
402
  mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
729
402
  mb_modes=_dec->state.mb_modes;
730
402
  nmbs=_dec->state.nmbs;
731
1.38M
  for(mbi=0;mbi<nmbs;mbi++){
732
1.38M
    if(mb_modes[mbi]>0){
733
      /*We have a coded luma block; decode a mode.*/
734
683k
      mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
735
683k
    }
736
    /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
737
       fact that OC_MODE_INTER_NOMV is already 0.*/
738
1.38M
  }
739
402
}
740
741
742
743
static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
744
  5,
745
   -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
746
   -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
747
   -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
748
   -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
749
   -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
750
   33,          36,          39,          42,
751
   45,          50,          55,          60,
752
   65,          74,          83,          92,
753
    1,-(1<<8|32+4),-(1<<8|32-4),
754
    1,-(1<<8|32+5),-(1<<8|32-5),
755
    1,-(1<<8|32+6),-(1<<8|32-6),
756
    1,-(1<<8|32+7),-(1<<8|32-7),
757
    2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
758
    2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
759
    2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
760
    2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
761
    3,
762
     -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
763
     -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
764
    3,
765
     -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
766
     -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
767
    3,
768
     -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
769
     -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
770
    3,
771
     -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
772
     -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
773
};
774
775
static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
776
  6,
777
   -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
778
   -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
779
   -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
780
   -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
781
   -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
782
   -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
783
   -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
784
   -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
785
   -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
786
   -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
787
   -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
788
   -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
789
   -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
790
   -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
791
   -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
792
   -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
793
};
794
795
796
700k
static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
797
700k
  int dx;
798
700k
  int dy;
799
700k
  dx=oc_huff_token_decode(_opb,_tree)-32;
800
700k
  dy=oc_huff_token_decode(_opb,_tree)-32;
801
700k
  return OC_MV(dx,dy);
802
700k
}
803
804
/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
805
   block modes and motion vectors to the individual fragments.*/
806
402
static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
807
402
  const oc_mb_map        *mb_maps;
808
402
  const signed char      *mb_modes;
809
402
  oc_set_chroma_mvs_func  set_chroma_mvs;
810
402
  const ogg_int16_t      *mv_comp_tree;
811
402
  oc_fragment            *frags;
812
402
  oc_mv                  *frag_mvs;
813
402
  const unsigned char    *map_idxs;
814
402
  int                     map_nidxs;
815
402
  oc_mv                   last_mv;
816
402
  oc_mv                   prior_mv;
817
402
  oc_mv                   cbmvs[4];
818
402
  size_t                  nmbs;
819
402
  size_t                  mbi;
820
402
  long                    val;
821
402
  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
822
402
  val=oc_pack_read1(&_dec->opb);
823
402
  mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
824
402
  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
825
402
  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
826
402
  prior_mv=last_mv=0;
827
402
  frags=_dec->state.frags;
828
402
  frag_mvs=_dec->state.frag_mvs;
829
402
  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
830
402
  mb_modes=_dec->state.mb_modes;
831
402
  nmbs=_dec->state.nmbs;
832
1.38M
  for(mbi=0;mbi<nmbs;mbi++){
833
1.38M
    int mb_mode;
834
1.38M
    mb_mode=mb_modes[mbi];
835
1.38M
    if(mb_mode!=OC_MODE_INVALID){
836
1.35M
      oc_mv     mbmv;
837
1.35M
      ptrdiff_t fragi;
838
1.35M
      int       mapi;
839
1.35M
      int       mapii;
840
1.35M
      int       refi;
841
1.35M
      if(mb_mode==OC_MODE_INTER_MV_FOUR){
842
227k
        oc_mv lbmvs[4];
843
227k
        int   bi;
844
227k
        prior_mv=last_mv;
845
1.13M
        for(bi=0;bi<4;bi++){
846
908k
          fragi=mb_maps[mbi][0][bi];
847
908k
          if(frags[fragi].coded){
848
622k
            frags[fragi].refi=OC_FRAME_PREV;
849
622k
            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
850
622k
            lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
851
622k
            frag_mvs[fragi]=lbmvs[bi];
852
622k
          }
853
285k
          else lbmvs[bi]=0;
854
908k
        }
855
227k
        (*set_chroma_mvs)(cbmvs,lbmvs);
856
681k
        for(mapii=4;mapii<map_nidxs;mapii++){
857
454k
          mapi=map_idxs[mapii];
858
454k
          bi=mapi&3;
859
454k
          fragi=mb_maps[mbi][mapi>>2][bi];
860
454k
          if(frags[fragi].coded){
861
262k
            frags[fragi].refi=OC_FRAME_PREV;
862
262k
            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
863
262k
            frag_mvs[fragi]=cbmvs[bi];
864
262k
          }
865
454k
        }
866
227k
      }
867
1.12M
      else{
868
1.12M
        switch(mb_mode){
869
72.9k
          case OC_MODE_INTER_MV:{
870
72.9k
            prior_mv=last_mv;
871
72.9k
            last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
872
72.9k
          }break;
873
134k
          case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
874
57.3k
          case OC_MODE_INTER_MV_LAST2:{
875
57.3k
            mbmv=prior_mv;
876
57.3k
            prior_mv=last_mv;
877
57.3k
            last_mv=mbmv;
878
57.3k
          }break;
879
4.98k
          case OC_MODE_GOLDEN_MV:{
880
4.98k
            mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
881
4.98k
          }break;
882
855k
          default:mbmv=0;break;
883
1.12M
        }
884
        /*Fill in the MVs for the fragments.*/
885
1.12M
        refi=OC_FRAME_FOR_MODE(mb_mode);
886
1.12M
        mapii=0;
887
6.84M
        do{
888
6.84M
          mapi=map_idxs[mapii];
889
6.84M
          fragi=mb_maps[mbi][mapi>>2][mapi&3];
890
6.84M
          if(frags[fragi].coded){
891
2.05M
            frags[fragi].refi=refi;
892
2.05M
            frags[fragi].mb_mode=mb_mode;
893
2.05M
            frag_mvs[fragi]=mbmv;
894
2.05M
          }
895
6.84M
        }
896
6.84M
        while(++mapii<map_nidxs);
897
1.12M
      }
898
1.35M
    }
899
1.38M
  }
900
402
}
901
902
6.23k
static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
903
6.23k
  oc_fragment     *frags;
904
6.23k
  const ptrdiff_t *coded_fragis;
905
6.23k
  ptrdiff_t        ncoded_fragis;
906
6.23k
  ptrdiff_t        fragii;
907
6.23k
  ptrdiff_t        fragi;
908
6.23k
  ncoded_fragis=_dec->state.ntotal_coded_fragis;
909
6.23k
  if(ncoded_fragis<=0)return;
910
6.23k
  frags=_dec->state.frags;
911
6.23k
  coded_fragis=_dec->state.coded_fragis;
912
6.23k
  if(_dec->state.nqis==1){
913
    /*If this frame has only a single qi value, then just use it for all coded
914
       fragments.*/
915
13.6M
    for(fragii=0;fragii<ncoded_fragis;fragii++){
916
13.6M
      frags[coded_fragis[fragii]].qii=0;
917
13.6M
    }
918
6.19k
  }
919
36
  else{
920
36
    long val;
921
36
    int  flag;
922
36
    int  nqi1;
923
36
    int  run_count;
924
    /*Otherwise, we decode a qi index for each fragment, using two passes of
925
      the same binary RLE scheme used for super-block coded bits.
926
     The first pass marks each fragment as having a qii of 0 or greater than
927
      0, and the second pass (if necessary), distinguishes between a qii of
928
      1 and 2.
929
     At first we just store the qii in the fragment.
930
     After all the qii's are decoded, we make a final pass to replace them
931
      with the corresponding qi's for this frame.*/
932
36
    val=oc_pack_read1(&_dec->opb);
933
36
    flag=(int)val;
934
36
    nqi1=0;
935
36
    fragii=0;
936
485k
    while(fragii<ncoded_fragis){
937
485k
      int full_run;
938
485k
      run_count=oc_sb_run_unpack(&_dec->opb);
939
485k
      full_run=run_count>=4129;
940
503k
      do{
941
503k
        frags[coded_fragis[fragii++]].qii=flag;
942
503k
        nqi1+=flag;
943
503k
      }
944
503k
      while(--run_count>0&&fragii<ncoded_fragis);
945
485k
      if(full_run&&fragii<ncoded_fragis){
946
0
        val=oc_pack_read1(&_dec->opb);
947
0
        flag=(int)val;
948
0
      }
949
485k
      else flag=!flag;
950
485k
    }
951
    /*TODO: run_count should be 0 here.
952
      If it's not, we should issue a warning of some kind.*/
953
    /*If we have 3 different qi's for this frame, and there was at least one
954
       fragment with a non-zero qi, make the second pass.*/
955
36
    if(_dec->state.nqis==3&&nqi1>0){
956
      /*Skip qii==0 fragments.*/
957
32
      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
958
16
      val=oc_pack_read1(&_dec->opb);
959
16
      flag=(int)val;
960
86.8k
      do{
961
86.8k
        int full_run;
962
86.8k
        run_count=oc_sb_run_unpack(&_dec->opb);
963
86.8k
        full_run=run_count>=4129;
964
259k
        for(;fragii<ncoded_fragis;fragii++){
965
259k
          fragi=coded_fragis[fragii];
966
259k
          if(frags[fragi].qii==0)continue;
967
173k
          if(run_count--<=0)break;
968
86.8k
          frags[fragi].qii+=flag;
969
86.8k
        }
970
86.8k
        if(full_run&&fragii<ncoded_fragis){
971
0
          val=oc_pack_read1(&_dec->opb);
972
0
          flag=(int)val;
973
0
        }
974
86.8k
        else flag=!flag;
975
86.8k
      }
976
86.8k
      while(fragii<ncoded_fragis);
977
      /*TODO: run_count should be 0 here.
978
        If it's not, we should issue a warning of some kind.*/
979
16
    }
980
36
  }
981
6.23k
}
982
983
984
985
/*Unpacks the DC coefficient tokens.
986
  Unlike when unpacking the AC coefficient tokens, we actually need to decode
987
   the DC coefficient values now so that we can do DC prediction.
988
  _huff_idx:   The index of the Huffman table to use for each color plane.
989
  _ntoks_left: The number of tokens left to be decoded in each color plane for
990
                each coefficient.
991
               This is updated as EOB tokens and zero run tokens are decoded.
992
  Return: The length of any outstanding EOB run.*/
993
static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
994
6.23k
 ptrdiff_t _ntoks_left[3][64]){
995
6.23k
  unsigned char   *dct_tokens;
996
6.23k
  oc_fragment     *frags;
997
6.23k
  const ptrdiff_t *coded_fragis;
998
6.23k
  ptrdiff_t        ncoded_fragis;
999
6.23k
  ptrdiff_t        fragii;
1000
6.23k
  ptrdiff_t        eobs;
1001
6.23k
  ptrdiff_t        ti;
1002
6.23k
  int              pli;
1003
6.23k
  dct_tokens=_dec->dct_tokens;
1004
6.23k
  frags=_dec->state.frags;
1005
6.23k
  coded_fragis=_dec->state.coded_fragis;
1006
6.23k
  ncoded_fragis=fragii=eobs=ti=0;
1007
24.9k
  for(pli=0;pli<3;pli++){
1008
18.6k
    ptrdiff_t run_counts[64];
1009
18.6k
    ptrdiff_t eob_count;
1010
18.6k
    ptrdiff_t eobi;
1011
18.6k
    int       rli;
1012
18.6k
    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1013
18.6k
    memset(run_counts,0,sizeof(run_counts));
1014
18.6k
    _dec->eob_runs[pli][0]=eobs;
1015
18.6k
    _dec->ti0[pli][0]=ti;
1016
    /*Continue any previous EOB run, if there was one.*/
1017
18.6k
    eobi=eobs;
1018
18.6k
    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1019
18.6k
    eob_count=eobi;
1020
18.6k
    eobs-=eobi;
1021
48.8k
    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1022
13.5M
    while(fragii<ncoded_fragis){
1023
13.5M
      int token;
1024
13.5M
      int cw;
1025
13.5M
      int eb;
1026
13.5M
      int skip;
1027
13.5M
      token=oc_huff_token_decode(&_dec->opb,
1028
13.5M
       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1029
13.5M
      dct_tokens[ti++]=(unsigned char)token;
1030
13.5M
      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1031
12.0M
        eb=(int)oc_pack_read(&_dec->opb,
1032
12.0M
         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1033
12.0M
        dct_tokens[ti++]=(unsigned char)eb;
1034
12.0M
        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1035
12.0M
        eb<<=OC_DCT_TOKEN_EB_POS(token);
1036
12.0M
      }
1037
1.50M
      else eb=0;
1038
13.5M
      cw=OC_DCT_CODE_WORD[token]+eb;
1039
13.5M
      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1040
13.5M
      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1041
13.5M
      if(eobs){
1042
275k
        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1043
275k
        eob_count+=eobi;
1044
275k
        eobs-=eobi;
1045
1.17M
        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1046
275k
      }
1047
13.2M
      else{
1048
13.2M
        int coeff;
1049
13.2M
        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1050
13.2M
        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1051
13.2M
        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1052
13.2M
        if(skip)coeff=0;
1053
13.2M
        run_counts[skip]++;
1054
13.2M
        frags[coded_fragis[fragii++]].dc=coeff;
1055
13.2M
      }
1056
13.5M
    }
1057
    /*Add the total EOB count to the longest run length.*/
1058
18.6k
    run_counts[63]+=eob_count;
1059
    /*And convert the run_counts array to a moment table.*/
1060
1.19M
    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1061
    /*Finally, subtract off the number of coefficients that have been
1062
       accounted for by runs started in this coefficient.*/
1063
1.21M
    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1064
18.6k
  }
1065
6.23k
  _dec->dct_tokens_count=ti;
1066
6.23k
  return eobs;
1067
6.23k
}
1068
1069
/*Unpacks the AC coefficient tokens.
1070
  This can completely discard coefficient values while unpacking, and so is
1071
   somewhat simpler than unpacking the DC coefficient tokens.
1072
  _huff_idx:   The index of the Huffman table to use for each color plane.
1073
  _ntoks_left: The number of tokens left to be decoded in each color plane for
1074
                each coefficient.
1075
               This is updated as EOB tokens and zero run tokens are decoded.
1076
  _eobs:       The length of any outstanding EOB run from previous
1077
                coefficients.
1078
  Return: The length of any outstanding EOB run.*/
1079
static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1080
392k
 ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1081
392k
  unsigned char *dct_tokens;
1082
392k
  ptrdiff_t      ti;
1083
392k
  int            pli;
1084
392k
  dct_tokens=_dec->dct_tokens;
1085
392k
  ti=_dec->dct_tokens_count;
1086
1.57M
  for(pli=0;pli<3;pli++){
1087
1.17M
    ptrdiff_t run_counts[64];
1088
1.17M
    ptrdiff_t eob_count;
1089
1.17M
    size_t    ntoks_left;
1090
1.17M
    size_t    ntoks;
1091
1.17M
    int       rli;
1092
1.17M
    _dec->eob_runs[pli][_zzi]=_eobs;
1093
1.17M
    _dec->ti0[pli][_zzi]=ti;
1094
1.17M
    ntoks_left=_ntoks_left[pli][_zzi];
1095
1.17M
    memset(run_counts,0,sizeof(run_counts));
1096
1.17M
    eob_count=0;
1097
1.17M
    ntoks=0;
1098
270M
    while(ntoks+_eobs<ntoks_left){
1099
269M
      int token;
1100
269M
      int cw;
1101
269M
      int eb;
1102
269M
      int skip;
1103
269M
      ntoks+=_eobs;
1104
269M
      eob_count+=_eobs;
1105
269M
      token=oc_huff_token_decode(&_dec->opb,
1106
269M
       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1107
269M
      dct_tokens[ti++]=(unsigned char)token;
1108
269M
      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1109
139k
        eb=(int)oc_pack_read(&_dec->opb,
1110
139k
         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1111
139k
        dct_tokens[ti++]=(unsigned char)eb;
1112
139k
        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1113
139k
        eb<<=OC_DCT_TOKEN_EB_POS(token);
1114
139k
      }
1115
269M
      else eb=0;
1116
269M
      cw=OC_DCT_CODE_WORD[token]+eb;
1117
269M
      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1118
269M
      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1119
269M
      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1120
269M
      if(_eobs==0){
1121
257M
        run_counts[skip]++;
1122
257M
        ntoks++;
1123
257M
      }
1124
269M
    }
1125
    /*Add the portion of the last EOB run actually used by this coefficient.*/
1126
1.17M
    eob_count+=ntoks_left-ntoks;
1127
    /*And remove it from the remaining EOB count.*/
1128
1.17M
    _eobs-=ntoks_left-ntoks;
1129
    /*Add the total EOB count to the longest run length.*/
1130
1.17M
    run_counts[63]+=eob_count;
1131
    /*And convert the run_counts array to a moment table.*/
1132
75.3M
    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1133
    /*Finally, subtract off the number of coefficients that have been
1134
       accounted for by runs started in this coefficient.*/
1135
38.8M
    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1136
1.17M
  }
1137
392k
  _dec->dct_tokens_count=ti;
1138
392k
  return _eobs;
1139
392k
}
1140
1141
/*Tokens describing the DCT coefficients that belong to each fragment are
1142
   stored in the bitstream grouped by coefficient, not by fragment.
1143
1144
  This means that we either decode all the tokens in order, building up a
1145
   separate coefficient list for each fragment as we go, and then go back and
1146
   do the iDCT on each fragment, or we have to create separate lists of tokens
1147
   for each coefficient, so that we can pull the next token required off the
1148
   head of the appropriate list when decoding a specific fragment.
1149
1150
  The former was VP3's choice, and it meant 2*w*h extra storage for all the
1151
   decoded coefficient values.
1152
1153
  We take the second option, which lets us store just one to three bytes per
1154
   token (generally far fewer than the number of coefficients, due to EOB
1155
   tokens and zero runs), and which requires us to only maintain a counter for
1156
   each of the 64 coefficients, instead of a counter for every fragment to
1157
   determine where the next token goes.
1158
1159
  We actually use 3 counters per coefficient, one for each color plane, so we
1160
   can decode all color planes simultaneously.
1161
  This lets color conversion, etc., be done as soon as a full MCU (one or
1162
   two super block rows) is decoded, while the image data is still in cache.*/
1163
1164
6.23k
static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1165
6.23k
  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1166
6.23k
  ptrdiff_t  ntoks_left[3][64];
1167
6.23k
  int        huff_idxs[2];
1168
6.23k
  ptrdiff_t  eobs;
1169
6.23k
  long       val;
1170
6.23k
  int        pli;
1171
6.23k
  int        zzi;
1172
6.23k
  int        hgi;
1173
1.21M
  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1174
1.19M
    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1175
1.19M
  }
1176
6.23k
  val=oc_pack_read(&_dec->opb,4);
1177
6.23k
  huff_idxs[0]=(int)val;
1178
6.23k
  val=oc_pack_read(&_dec->opb,4);
1179
6.23k
  huff_idxs[1]=(int)val;
1180
6.23k
  _dec->eob_runs[0][0]=0;
1181
6.23k
  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1182
#if defined(HAVE_CAIRO)
1183
  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1184
#endif
1185
6.23k
  val=oc_pack_read(&_dec->opb,4);
1186
6.23k
  huff_idxs[0]=(int)val;
1187
6.23k
  val=oc_pack_read(&_dec->opb,4);
1188
6.23k
  huff_idxs[1]=(int)val;
1189
6.23k
  zzi=1;
1190
31.1k
  for(hgi=1;hgi<5;hgi++){
1191
24.9k
    huff_idxs[0]+=16;
1192
24.9k
    huff_idxs[1]+=16;
1193
417k
    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1194
392k
      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1195
392k
    }
1196
24.9k
  }
1197
  /*TODO: eobs should be exactly zero, or 4096 or greater.
1198
    The second case occurs when an EOB run of size zero is encountered, which
1199
     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1200
    If neither of these conditions holds, then a warning should be issued.*/
1201
6.23k
}
1202
1203
1204
6.23k
static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1205
  /*musl libc malloc()/realloc() calls might use floating point, so make sure
1206
     we've cleared the MMX state for them.*/
1207
6.23k
  oc_restore_fpu(&_dec->state);
1208
  /*pp_level 0: disabled; free any memory used and return*/
1209
6.23k
  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1210
6.23k
    if(_dec->dc_qis!=NULL){
1211
0
      _ogg_free(_dec->dc_qis);
1212
0
      _dec->dc_qis=NULL;
1213
0
      _ogg_free(_dec->variances);
1214
0
      _dec->variances=NULL;
1215
0
      _ogg_free(_dec->pp_frame_data);
1216
0
      _dec->pp_frame_data=NULL;
1217
0
    }
1218
6.23k
    return 1;
1219
6.23k
  }
1220
0
  if(_dec->dc_qis==NULL){
1221
    /*If we haven't been tracking DC quantization indices, there's no point in
1222
       starting now.*/
1223
0
    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1224
0
    _dec->dc_qis=(unsigned char *)_ogg_malloc(
1225
0
     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1226
0
    if(_dec->dc_qis==NULL)return 1;
1227
0
    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1228
0
  }
1229
0
  else{
1230
0
    unsigned char   *dc_qis;
1231
0
    const ptrdiff_t *coded_fragis;
1232
0
    ptrdiff_t        ncoded_fragis;
1233
0
    ptrdiff_t        fragii;
1234
0
    unsigned char    qi0;
1235
    /*Update the DC quantization index of each coded block.*/
1236
0
    dc_qis=_dec->dc_qis;
1237
0
    coded_fragis=_dec->state.coded_fragis;
1238
0
    ncoded_fragis=_dec->state.ncoded_fragis[0]+
1239
0
     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1240
0
    qi0=(unsigned char)_dec->state.qis[0];
1241
0
    for(fragii=0;fragii<ncoded_fragis;fragii++){
1242
0
      dc_qis[coded_fragis[fragii]]=qi0;
1243
0
    }
1244
0
  }
1245
  /*pp_level 1: Stop after updating DC quantization indices.*/
1246
0
  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1247
0
    if(_dec->variances!=NULL){
1248
0
      _ogg_free(_dec->variances);
1249
0
      _dec->variances=NULL;
1250
0
      _ogg_free(_dec->pp_frame_data);
1251
0
      _dec->pp_frame_data=NULL;
1252
0
    }
1253
0
    return 1;
1254
0
  }
1255
0
  if(_dec->variances==NULL){
1256
0
    size_t frame_sz;
1257
0
    size_t c_sz;
1258
0
    int    c_w;
1259
0
    int    c_h;
1260
0
    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1261
0
    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1262
0
    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1263
0
    c_sz=c_w*(size_t)c_h;
1264
    /*Allocate space for the chroma planes, even if we're not going to use
1265
       them; this simplifies allocation state management, though it may waste
1266
       memory on the few systems that don't overcommit pages.*/
1267
0
    frame_sz+=c_sz<<1;
1268
0
    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1269
0
     frame_sz*sizeof(_dec->pp_frame_data[0]));
1270
0
    _dec->variances=(int *)_ogg_malloc(
1271
0
     _dec->state.nfrags*sizeof(_dec->variances[0]));
1272
0
    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1273
0
      _ogg_free(_dec->pp_frame_data);
1274
0
      _dec->pp_frame_data=NULL;
1275
0
      _ogg_free(_dec->variances);
1276
0
      _dec->variances=NULL;
1277
0
      return 1;
1278
0
    }
1279
    /*Force an update of the PP buffer pointers.*/
1280
0
    _dec->pp_frame_state=0;
1281
0
  }
1282
  /*Update the PP buffer pointers if necessary.*/
1283
0
  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1284
0
    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1285
      /*If chroma processing is disabled, just use the PP luma plane.*/
1286
0
      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1287
0
      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1288
0
      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1289
0
      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1290
0
       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1291
0
    }
1292
0
    else{
1293
0
      size_t y_sz;
1294
0
      size_t c_sz;
1295
0
      int    c_w;
1296
0
      int    c_h;
1297
      /*Otherwise, set up pointers to all three PP planes.*/
1298
0
      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1299
0
      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1300
0
      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1301
0
      c_sz=c_w*(size_t)c_h;
1302
0
      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1303
0
      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1304
0
      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1305
0
      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1306
0
      _dec->pp_frame_buf[1].width=c_w;
1307
0
      _dec->pp_frame_buf[1].height=c_h;
1308
0
      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1309
0
      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1310
0
      _dec->pp_frame_buf[2].width=c_w;
1311
0
      _dec->pp_frame_buf[2].height=c_h;
1312
0
      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1313
0
      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1314
0
      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1315
0
    }
1316
0
    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1317
0
  }
1318
  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1319
0
  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1320
0
    memcpy(_dec->pp_frame_buf+1,
1321
0
     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1322
0
     sizeof(_dec->pp_frame_buf[1])*2);
1323
0
  }
1324
0
  return 0;
1325
0
}
1326
1327
1328
/*Initialize the main decoding pipeline.*/
1329
static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1330
6.23k
 oc_dec_pipeline_state *_pipe){
1331
6.23k
  const ptrdiff_t *coded_fragis;
1332
6.23k
  const ptrdiff_t *uncoded_fragis;
1333
6.23k
  int              flimit;
1334
6.23k
  int              pli;
1335
6.23k
  int              qii;
1336
6.23k
  int              qti;
1337
6.23k
  int              zzi;
1338
  /*If chroma is sub-sampled in the vertical direction, we have to decode two
1339
     super block rows of Y' for each super block row of Cb and Cr.*/
1340
6.23k
  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1341
  /*Initialize the token and extra bits indices for each plane and
1342
     coefficient.*/
1343
6.23k
  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1344
  /*Also copy over the initial the EOB run counts.*/
1345
6.23k
  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1346
  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1347
6.23k
  coded_fragis=_dec->state.coded_fragis;
1348
6.23k
  uncoded_fragis=coded_fragis+_dec->state.nfrags;
1349
24.9k
  for(pli=0;pli<3;pli++){
1350
18.6k
    ptrdiff_t ncoded_fragis;
1351
18.6k
    _pipe->coded_fragis[pli]=coded_fragis;
1352
18.6k
    _pipe->uncoded_fragis[pli]=uncoded_fragis;
1353
18.6k
    ncoded_fragis=_dec->state.ncoded_fragis[pli];
1354
18.6k
    coded_fragis+=ncoded_fragis;
1355
18.6k
    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1356
18.6k
  }
1357
  /*Set up condensed quantizer tables.*/
1358
24.9k
  for(pli=0;pli<3;pli++){
1359
37.5k
    for(qii=0;qii<_dec->state.nqis;qii++){
1360
56.5k
      for(qti=0;qti<2;qti++){
1361
37.6k
        _pipe->dequant[pli][qii][qti]=
1362
37.6k
         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1363
37.6k
      }
1364
18.8k
    }
1365
18.6k
  }
1366
  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1367
6.23k
  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1368
  /*Initialize the bounding value array for the loop filter.*/
1369
6.23k
  flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1370
6.23k
  _pipe->loop_filter=flimit!=0;
1371
6.23k
  if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1372
  /*Initialize any buffers needed for post-processing.
1373
    We also save the current post-processing level, to guard against the user
1374
     changing it from a callback.*/
1375
6.23k
  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1376
  /*If we don't have enough information to post-process, disable it, regardless
1377
     of the user-requested level.*/
1378
6.23k
  else{
1379
6.23k
    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1380
6.23k
    memcpy(_dec->pp_frame_buf,
1381
6.23k
     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1382
6.23k
     sizeof(_dec->pp_frame_buf[0])*3);
1383
6.23k
  }
1384
  /*Clear down the DCT coefficient buffer for the first block.*/
1385
405k
  for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1386
6.23k
}
1387
1388
/*Undo the DC prediction in a single plane of an MCU (one or two super block
1389
   rows).
1390
  As a side effect, the number of coded and uncoded fragments in this plane of
1391
   the MCU is also computed.*/
1392
void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1393
84.9k
 oc_dec_pipeline_state *_pipe,int _pli){
1394
84.9k
  const oc_fragment_plane *fplane;
1395
84.9k
  oc_fragment             *frags;
1396
84.9k
  int                     *pred_last;
1397
84.9k
  ptrdiff_t                ncoded_fragis;
1398
84.9k
  ptrdiff_t                fragi;
1399
84.9k
  int                      fragx;
1400
84.9k
  int                      fragy;
1401
84.9k
  int                      fragy0;
1402
84.9k
  int                      fragy_end;
1403
84.9k
  int                      nhfrags;
1404
  /*Compute the first and last fragment row of the current MCU for this
1405
     plane.*/
1406
84.9k
  fplane=_dec->state.fplanes+_pli;
1407
84.9k
  fragy0=_pipe->fragy0[_pli];
1408
84.9k
  fragy_end=_pipe->fragy_end[_pli];
1409
84.9k
  nhfrags=fplane->nhfrags;
1410
84.9k
  pred_last=_pipe->pred_last[_pli];
1411
84.9k
  frags=_dec->state.frags;
1412
84.9k
  ncoded_fragis=0;
1413
84.9k
  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1414
508k
  for(fragy=fragy0;fragy<fragy_end;fragy++){
1415
423k
    if(fragy==0){
1416
      /*For the first row, all of the cases reduce to just using the previous
1417
         predictor for the same reference frame.*/
1418
614k
      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1419
595k
        if(frags[fragi].coded){
1420
506k
          int refi;
1421
506k
          refi=frags[fragi].refi;
1422
506k
          pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1423
506k
          ncoded_fragis++;
1424
506k
        }
1425
595k
      }
1426
18.6k
    }
1427
404k
    else{
1428
404k
      oc_fragment *u_frags;
1429
404k
      int          l_ref;
1430
404k
      int          ul_ref;
1431
404k
      int          u_ref;
1432
404k
      u_frags=frags-nhfrags;
1433
404k
      l_ref=-1;
1434
404k
      ul_ref=-1;
1435
404k
      u_ref=u_frags[fragi].refi;
1436
19.2M
      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1437
18.8M
        int ur_ref;
1438
18.8M
        if(fragx+1>=nhfrags)ur_ref=-1;
1439
18.4M
        else ur_ref=u_frags[fragi+1].refi;
1440
18.8M
        if(frags[fragi].coded){
1441
13.6M
          int pred;
1442
13.6M
          int refi;
1443
13.6M
          refi=frags[fragi].refi;
1444
          /*We break out a separate case based on which of our neighbors use
1445
             the same reference frames.
1446
            This is somewhat faster than trying to make a generic case which
1447
             handles all of them, since it reduces lots of poorly predicted
1448
             jumps to one switch statement, and also lets a number of the
1449
             multiplications be optimized out by strength reduction.*/
1450
13.6M
          switch((l_ref==refi)|(ul_ref==refi)<<1|
1451
13.6M
           (u_ref==refi)<<2|(ur_ref==refi)<<3){
1452
166k
            default:pred=pred_last[refi];break;
1453
132k
            case  1:
1454
193k
            case  3:pred=frags[fragi-1].dc;break;
1455
50.3k
            case  2:pred=u_frags[fragi-1].dc;break;
1456
42.0k
            case  4:
1457
69.9k
            case  6:
1458
545k
            case 12:pred=u_frags[fragi].dc;break;
1459
33.3k
            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1460
59.8k
            case  8:pred=u_frags[fragi+1].dc;break;
1461
52.7k
            case  9:
1462
108k
            case 11:
1463
192k
            case 13:{
1464
              /*The TI compiler mis-compiles this line.*/
1465
192k
              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1466
192k
            }break;
1467
378k
            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1468
85.4k
            case 14:{
1469
85.4k
              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1470
85.4k
               +10*u_frags[fragi].dc)/16;
1471
85.4k
            }break;
1472
526k
            case  7:
1473
11.9M
            case 15:{
1474
11.9M
              int p0;
1475
11.9M
              int p1;
1476
11.9M
              int p2;
1477
11.9M
              p0=frags[fragi-1].dc;
1478
11.9M
              p1=u_frags[fragi-1].dc;
1479
11.9M
              p2=u_frags[fragi].dc;
1480
11.9M
              pred=(29*(p0+p2)-26*p1)/32;
1481
11.9M
              if(abs(pred-p2)>128)pred=p2;
1482
11.2M
              else if(abs(pred-p0)>128)pred=p0;
1483
10.6M
              else if(abs(pred-p1)>128)pred=p1;
1484
11.9M
            }break;
1485
13.6M
          }
1486
13.6M
          pred_last[refi]=frags[fragi].dc+=pred;
1487
13.6M
          ncoded_fragis++;
1488
13.6M
          l_ref=refi;
1489
13.6M
        }
1490
5.17M
        else l_ref=-1;
1491
18.8M
        ul_ref=u_ref;
1492
18.8M
        u_ref=ur_ref;
1493
18.8M
      }
1494
404k
    }
1495
423k
  }
1496
84.9k
  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1497
  /*Also save the number of uncoded fragments so we know how many to copy.*/
1498
84.9k
  _pipe->nuncoded_fragis[_pli]=
1499
84.9k
   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1500
84.9k
}
1501
1502
/*Reconstructs all coded fragments in a single MCU (one or two super block
1503
   rows).
1504
  This requires that each coded fragment have a proper macro block mode and
1505
   motion vector (if not in INTRA mode), and have its DC value decoded, with
1506
   the DC prediction process reversed, and the number of coded and uncoded
1507
   fragments in this plane of the MCU be counted.
1508
  The token lists for each color plane and coefficient should also be filled
1509
   in, along with initial token offsets, extra bits offsets, and EOB run
1510
   counts.*/
1511
static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1512
84.9k
 oc_dec_pipeline_state *_pipe,int _pli){
1513
84.9k
  unsigned char       *dct_tokens;
1514
84.9k
  const unsigned char *dct_fzig_zag;
1515
84.9k
  ogg_uint16_t         dc_quant[2];
1516
84.9k
  const oc_fragment   *frags;
1517
84.9k
  const ptrdiff_t     *coded_fragis;
1518
84.9k
  ptrdiff_t            ncoded_fragis;
1519
84.9k
  ptrdiff_t            fragii;
1520
84.9k
  ptrdiff_t           *ti;
1521
84.9k
  ptrdiff_t           *eob_runs;
1522
84.9k
  int                  qti;
1523
84.9k
  dct_tokens=_dec->dct_tokens;
1524
84.9k
  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1525
84.9k
  frags=_dec->state.frags;
1526
84.9k
  coded_fragis=_pipe->coded_fragis[_pli];
1527
84.9k
  ncoded_fragis=_pipe->ncoded_fragis[_pli];
1528
84.9k
  ti=_pipe->ti[_pli];
1529
84.9k
  eob_runs=_pipe->eob_runs[_pli];
1530
254k
  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1531
14.2M
  for(fragii=0;fragii<ncoded_fragis;fragii++){
1532
14.1M
    const ogg_uint16_t *ac_quant;
1533
14.1M
    ptrdiff_t           fragi;
1534
14.1M
    int                 last_zzi;
1535
14.1M
    int                 zzi;
1536
14.1M
    fragi=coded_fragis[fragii];
1537
14.1M
    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1538
14.1M
    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1539
    /*Decode the AC coefficients.*/
1540
297M
    for(zzi=0;zzi<64;){
1541
297M
      int token;
1542
297M
      last_zzi=zzi;
1543
297M
      if(eob_runs[zzi]){
1544
14.1M
        eob_runs[zzi]--;
1545
14.1M
        break;
1546
14.1M
      }
1547
282M
      else{
1548
282M
        ptrdiff_t eob;
1549
282M
        int       cw;
1550
282M
        int       rlen;
1551
282M
        int       coeff;
1552
282M
        int       lti;
1553
282M
        lti=ti[zzi];
1554
282M
        token=dct_tokens[lti++];
1555
282M
        cw=OC_DCT_CODE_WORD[token];
1556
        /*These parts could be done branchless, but the branches are fairly
1557
           predictable and the C code translates into more than a few
1558
           instructions, so it's worth it to avoid them.*/
1559
282M
        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1560
12.1M
          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1561
12.1M
        }
1562
282M
        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1563
282M
        if(token==OC_DCT_TOKEN_FAT_EOB){
1564
2.34k
          eob+=dct_tokens[lti++]<<8;
1565
2.34k
          if(eob==0)eob=OC_DCT_EOB_FINISH;
1566
2.34k
        }
1567
282M
        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1568
282M
        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1569
282M
        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1570
282M
        eob_runs[zzi]=eob;
1571
282M
        ti[zzi]=lti;
1572
282M
        zzi+=rlen;
1573
282M
        _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1574
282M
         (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1575
282M
        zzi+=!eob;
1576
282M
      }
1577
297M
    }
1578
    /*TODO: zzi should be exactly 64 here.
1579
      If it's not, we should report some kind of warning.*/
1580
14.1M
    zzi=OC_MINI(zzi,64);
1581
14.1M
    _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1582
    /*last_zzi is always initialized.
1583
      If your compiler thinks otherwise, it is dumb.*/
1584
14.1M
    oc_state_frag_recon(&_dec->state,fragi,_pli,
1585
14.1M
     _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1586
14.1M
  }
1587
84.9k
  _pipe->coded_fragis[_pli]+=ncoded_fragis;
1588
  /*Right now the reconstructed MCU has only the coded blocks in it.*/
1589
  /*TODO: We make the decision here to always copy the uncoded blocks into it
1590
     from the reference frame.
1591
    We could also copy the coded blocks back over the reference frame, if we
1592
     wait for an additional MCU to be decoded, which might be faster if only a
1593
     small number of blocks are coded.
1594
    However, this introduces more latency, creating a larger cache footprint.
1595
    It's unknown which decision is better, but this one results in simpler
1596
     code, and the hard case (high bitrate, high resolution) is handled
1597
     correctly.*/
1598
  /*Copy the uncoded blocks from the previous reference frame.*/
1599
84.9k
  if(_pipe->nuncoded_fragis[_pli]>0){
1600
13.2k
    _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1601
13.2k
    oc_frag_copy_list(&_dec->state,
1602
13.2k
     _dec->state.ref_frame_data[OC_FRAME_SELF],
1603
13.2k
     _dec->state.ref_frame_data[OC_FRAME_PREV],
1604
13.2k
     _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1605
13.2k
     _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1606
13.2k
  }
1607
84.9k
}
1608
1609
/*Filter a horizontal block edge.*/
1610
static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1611
 const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1612
0
 int *_variance0,int *_variance1){
1613
0
  unsigned char       *rdst;
1614
0
  const unsigned char *rsrc;
1615
0
  unsigned char       *cdst;
1616
0
  const unsigned char *csrc;
1617
0
  int                  r[10];
1618
0
  int                  sum0;
1619
0
  int                  sum1;
1620
0
  int                  bx;
1621
0
  int                  by;
1622
0
  rdst=_dst;
1623
0
  rsrc=_src;
1624
0
  for(bx=0;bx<8;bx++){
1625
0
    cdst=rdst;
1626
0
    csrc=rsrc;
1627
0
    for(by=0;by<10;by++){
1628
0
      r[by]=*csrc;
1629
0
      csrc+=_src_ystride;
1630
0
    }
1631
0
    sum0=sum1=0;
1632
0
    for(by=0;by<4;by++){
1633
0
      sum0+=abs(r[by+1]-r[by]);
1634
0
      sum1+=abs(r[by+5]-r[by+6]);
1635
0
    }
1636
0
    *_variance0+=OC_MINI(255,sum0);
1637
0
    *_variance1+=OC_MINI(255,sum1);
1638
0
    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1639
0
      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1640
0
      cdst+=_dst_ystride;
1641
0
      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1642
0
      cdst+=_dst_ystride;
1643
0
      for(by=0;by<4;by++){
1644
0
        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1645
0
         r[by+4]+r[by+5]+r[by+6]+4>>3);
1646
0
        cdst+=_dst_ystride;
1647
0
      }
1648
0
      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1649
0
      cdst+=_dst_ystride;
1650
0
      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1651
0
    }
1652
0
    else{
1653
0
      for(by=1;by<=8;by++){
1654
0
        *cdst=(unsigned char)r[by];
1655
0
        cdst+=_dst_ystride;
1656
0
      }
1657
0
    }
1658
0
    rdst++;
1659
0
    rsrc++;
1660
0
  }
1661
0
}
1662
1663
/*Filter a vertical block edge.*/
1664
static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1665
0
 int _qstep,int _flimit,int *_variances){
1666
0
  unsigned char       *rdst;
1667
0
  const unsigned char *rsrc;
1668
0
  unsigned char       *cdst;
1669
0
  int                  r[10];
1670
0
  int                  sum0;
1671
0
  int                  sum1;
1672
0
  int                  bx;
1673
0
  int                  by;
1674
0
  cdst=_dst;
1675
0
  for(by=0;by<8;by++){
1676
0
    rsrc=cdst-1;
1677
0
    rdst=cdst;
1678
0
    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1679
0
    sum0=sum1=0;
1680
0
    for(bx=0;bx<4;bx++){
1681
0
      sum0+=abs(r[bx+1]-r[bx]);
1682
0
      sum1+=abs(r[bx+5]-r[bx+6]);
1683
0
    }
1684
0
    _variances[0]+=OC_MINI(255,sum0);
1685
0
    _variances[1]+=OC_MINI(255,sum1);
1686
0
    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1687
0
      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1688
0
      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1689
0
      for(bx=0;bx<4;bx++){
1690
0
        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1691
0
         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1692
0
      }
1693
0
      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1694
0
      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1695
0
    }
1696
0
    cdst+=_dst_ystride;
1697
0
  }
1698
0
}
1699
1700
static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1701
 th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1702
0
 int _fragy_end){
1703
0
  oc_fragment_plane   *fplane;
1704
0
  int                 *variance;
1705
0
  unsigned char       *dc_qi;
1706
0
  unsigned char       *dst;
1707
0
  const unsigned char *src;
1708
0
  ptrdiff_t            froffset;
1709
0
  int                  dst_ystride;
1710
0
  int                  src_ystride;
1711
0
  int                  nhfrags;
1712
0
  int                  width;
1713
0
  int                  notstart;
1714
0
  int                  notdone;
1715
0
  int                  flimit;
1716
0
  int                  qstep;
1717
0
  int                  y_end;
1718
0
  int                  y;
1719
0
  int                  x;
1720
0
  _dst+=_pli;
1721
0
  _src+=_pli;
1722
0
  fplane=_dec->state.fplanes+_pli;
1723
0
  nhfrags=fplane->nhfrags;
1724
0
  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1725
0
  variance=_dec->variances+froffset;
1726
0
  dc_qi=_dec->dc_qis+froffset;
1727
0
  notstart=_fragy0>0;
1728
0
  notdone=_fragy_end<fplane->nvfrags;
1729
  /*We want to clear an extra row of variances, except at the end.*/
1730
0
  memset(variance+(nhfrags&-notstart),0,
1731
0
   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1732
  /*Except for the first time, we want to point to the middle of the row.*/
1733
0
  y=(_fragy0<<3)+(notstart<<2);
1734
0
  dst_ystride=_dst->stride;
1735
0
  src_ystride=_src->stride;
1736
0
  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1737
0
  src=_src->data+y*(ptrdiff_t)src_ystride;
1738
0
  width=_dst->width;
1739
0
  for(;y<4;y++){
1740
0
    memcpy(dst,src,width*sizeof(dst[0]));
1741
0
    dst+=dst_ystride;
1742
0
    src+=src_ystride;
1743
0
  }
1744
  /*We also want to skip the last row in the frame for this loop.*/
1745
0
  y_end=_fragy_end-!notdone<<3;
1746
0
  for(;y<y_end;y+=8){
1747
0
    qstep=_dec->pp_dc_scale[*dc_qi];
1748
0
    flimit=(qstep*3)>>2;
1749
0
    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1750
0
     qstep,flimit,variance,variance+nhfrags);
1751
0
    variance++;
1752
0
    dc_qi++;
1753
0
    for(x=8;x<width;x+=8){
1754
0
      qstep=_dec->pp_dc_scale[*dc_qi];
1755
0
      flimit=(qstep*3)>>2;
1756
0
      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1757
0
       qstep,flimit,variance,variance+nhfrags);
1758
0
      oc_filter_vedge(dst+x-(dst_ystride*4)-4,dst_ystride,
1759
0
       qstep,flimit,variance-1);
1760
0
      variance++;
1761
0
      dc_qi++;
1762
0
    }
1763
0
    dst+=dst_ystride*8;
1764
0
    src+=src_ystride*8;
1765
0
  }
1766
  /*And finally, handle the last row in the frame, if it's in the range.*/
1767
0
  if(!notdone){
1768
0
    int height;
1769
0
    height=_dst->height;
1770
0
    for(;y<height;y++){
1771
0
      memcpy(dst,src,width*sizeof(dst[0]));
1772
0
      dst+=dst_ystride;
1773
0
      src+=src_ystride;
1774
0
    }
1775
    /*Filter the last row of vertical block edges.*/
1776
0
    dc_qi++;
1777
0
    for(x=8;x<width;x+=8){
1778
0
      qstep=_dec->pp_dc_scale[*dc_qi++];
1779
0
      flimit=(qstep*3)>>2;
1780
0
      oc_filter_vedge(dst+x-(dst_ystride*8)-4,dst_ystride,
1781
0
       qstep,flimit,variance++);
1782
0
    }
1783
0
  }
1784
0
}
1785
1786
static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1787
0
 int _dc_scale,int _sharp_mod,int _strong){
1788
0
  static const unsigned char OC_MOD_MAX[2]={24,32};
1789
0
  static const unsigned char OC_MOD_SHIFT[2]={1,0};
1790
0
  const unsigned char *psrc;
1791
0
  const unsigned char *src;
1792
0
  const unsigned char *nsrc;
1793
0
  unsigned char       *dst;
1794
0
  int                  vmod[72];
1795
0
  int                  hmod[72];
1796
0
  int                  mod_hi;
1797
0
  int                  by;
1798
0
  int                  bx;
1799
0
  mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1800
0
  dst=_idata;
1801
0
  src=dst;
1802
0
  psrc=src-(_ystride&-!(_b&4));
1803
0
  for(by=0;by<9;by++){
1804
0
    for(bx=0;bx<8;bx++){
1805
0
      int mod;
1806
0
      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1807
0
      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1808
0
    }
1809
0
    psrc=src;
1810
0
    src+=_ystride&-(!(_b&8)|by<7);
1811
0
  }
1812
0
  nsrc=dst;
1813
0
  psrc=dst-!(_b&1);
1814
0
  for(bx=0;bx<9;bx++){
1815
0
    src=nsrc;
1816
0
    for(by=0;by<8;by++){
1817
0
      int mod;
1818
0
      mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1819
0
      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1820
0
      psrc+=_ystride;
1821
0
      src+=_ystride;
1822
0
    }
1823
0
    psrc=nsrc;
1824
0
    nsrc+=!(_b&2)|bx<7;
1825
0
  }
1826
0
  src=dst;
1827
0
  psrc=src-(_ystride&-!(_b&4));
1828
0
  nsrc=src+_ystride;
1829
0
  for(by=0;by<8;by++){
1830
0
    int a;
1831
0
    int b;
1832
0
    int w;
1833
0
    a=128;
1834
0
    b=64;
1835
0
    w=hmod[by];
1836
0
    a-=w;
1837
0
    b+=w**(src-!(_b&1));
1838
0
    w=vmod[by<<3];
1839
0
    a-=w;
1840
0
    b+=w*psrc[0];
1841
0
    w=vmod[by+1<<3];
1842
0
    a-=w;
1843
0
    b+=w*nsrc[0];
1844
0
    w=hmod[(1<<3)+by];
1845
0
    a-=w;
1846
0
    b+=w*src[1];
1847
0
    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1848
0
    for(bx=1;bx<7;bx++){
1849
0
      a=128;
1850
0
      b=64;
1851
0
      w=hmod[(bx<<3)+by];
1852
0
      a-=w;
1853
0
      b+=w*src[bx-1];
1854
0
      w=vmod[(by<<3)+bx];
1855
0
      a-=w;
1856
0
      b+=w*psrc[bx];
1857
0
      w=vmod[(by+1<<3)+bx];
1858
0
      a-=w;
1859
0
      b+=w*nsrc[bx];
1860
0
      w=hmod[(bx+1<<3)+by];
1861
0
      a-=w;
1862
0
      b+=w*src[bx+1];
1863
0
      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1864
0
    }
1865
0
    a=128;
1866
0
    b=64;
1867
0
    w=hmod[(7<<3)+by];
1868
0
    a-=w;
1869
0
    b+=w*src[6];
1870
0
    w=vmod[(by<<3)+7];
1871
0
    a-=w;
1872
0
    b+=w*psrc[7];
1873
0
    w=vmod[(by+1<<3)+7];
1874
0
    a-=w;
1875
0
    b+=w*nsrc[7];
1876
0
    w=hmod[(8<<3)+by];
1877
0
    a-=w;
1878
0
    b+=w*src[7+!(_b&2)];
1879
0
    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1880
0
    dst+=_ystride;
1881
0
    psrc=src;
1882
0
    src=nsrc;
1883
0
    nsrc+=_ystride&-(!(_b&8)|by<6);
1884
0
  }
1885
0
}
1886
1887
0
#define OC_DERING_THRESH1 (384)
1888
0
#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1889
0
#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1890
0
#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1891
1892
static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1893
0
 int _pli,int _fragy0,int _fragy_end){
1894
0
  th_img_plane      *iplane;
1895
0
  oc_fragment_plane *fplane;
1896
0
  oc_fragment       *frag;
1897
0
  int               *variance;
1898
0
  unsigned char     *idata;
1899
0
  ptrdiff_t          froffset;
1900
0
  int                ystride;
1901
0
  int                nhfrags;
1902
0
  int                sthresh;
1903
0
  int                strong;
1904
0
  int                y_end;
1905
0
  int                width;
1906
0
  int                height;
1907
0
  int                y;
1908
0
  int                x;
1909
0
  iplane=_img+_pli;
1910
0
  fplane=_dec->state.fplanes+_pli;
1911
0
  nhfrags=fplane->nhfrags;
1912
0
  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1913
0
  variance=_dec->variances+froffset;
1914
0
  frag=_dec->state.frags+froffset;
1915
0
  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1916
0
  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1917
0
  y=_fragy0<<3;
1918
0
  ystride=iplane->stride;
1919
0
  idata=iplane->data+y*(ptrdiff_t)ystride;
1920
0
  y_end=_fragy_end<<3;
1921
0
  width=iplane->width;
1922
0
  height=iplane->height;
1923
0
  for(;y<y_end;y+=8){
1924
0
    for(x=0;x<width;x+=8){
1925
0
      int b;
1926
0
      int qi;
1927
0
      int var;
1928
0
      qi=_dec->state.qis[frag->qii];
1929
0
      var=*variance;
1930
0
      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1931
0
      if(strong&&var>sthresh){
1932
0
        oc_dering_block(idata+x,ystride,b,
1933
0
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1934
0
        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1935
0
         !(b&2)&&variance[1]>OC_DERING_THRESH4||
1936
0
         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1937
0
         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1938
0
          oc_dering_block(idata+x,ystride,b,
1939
0
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940
0
          oc_dering_block(idata+x,ystride,b,
1941
0
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1942
0
        }
1943
0
      }
1944
0
      else if(var>OC_DERING_THRESH2){
1945
0
        oc_dering_block(idata+x,ystride,b,
1946
0
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1947
0
      }
1948
0
      else if(var>OC_DERING_THRESH1){
1949
0
        oc_dering_block(idata+x,ystride,b,
1950
0
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1951
0
      }
1952
0
      frag++;
1953
0
      variance++;
1954
0
    }
1955
0
    idata+=ystride*8;
1956
0
  }
1957
0
}
1958
1959
1960
1961
81
th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1962
81
  oc_dec_ctx *dec;
1963
81
  if(_info==NULL||_setup==NULL)return NULL;
1964
81
  dec=oc_aligned_malloc(sizeof(*dec),16);
1965
81
  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1966
0
    oc_aligned_free(dec);
1967
0
    return NULL;
1968
0
  }
1969
81
  dec->state.curframe_num=0;
1970
81
  return dec;
1971
81
}
1972
1973
759
void th_decode_free(th_dec_ctx *_dec){
1974
759
  if(_dec!=NULL){
1975
81
    oc_dec_clear(_dec);
1976
81
    oc_aligned_free(_dec);
1977
81
  }
1978
759
}
1979
1980
int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1981
0
 size_t _buf_sz){
1982
0
  switch(_req){
1983
0
  case TH_DECCTL_GET_PPLEVEL_MAX:{
1984
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1985
0
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
1986
0
    (*(int *)_buf)=OC_PP_LEVEL_MAX;
1987
0
    return 0;
1988
0
  }break;
1989
0
  case TH_DECCTL_SET_PPLEVEL:{
1990
0
    int pp_level;
1991
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1992
0
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
1993
0
    pp_level=*(int *)_buf;
1994
0
    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1995
0
    _dec->pp_level=pp_level;
1996
0
    return 0;
1997
0
  }break;
1998
0
  case TH_DECCTL_SET_GRANPOS:{
1999
0
    ogg_int64_t granpos;
2000
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2001
0
    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2002
0
    granpos=*(ogg_int64_t *)_buf;
2003
0
    if(granpos<0)return TH_EINVAL;
2004
0
    _dec->state.granpos=granpos;
2005
0
    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2006
0
     -_dec->state.granpos_bias;
2007
0
    _dec->state.curframe_num=_dec->state.keyframe_num
2008
0
     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2009
0
    return 0;
2010
0
  }break;
2011
0
  case TH_DECCTL_SET_STRIPE_CB:{
2012
0
    th_stripe_callback *cb;
2013
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2014
0
    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2015
0
    cb=(th_stripe_callback *)_buf;
2016
0
    _dec->stripe_cb.ctx=cb->ctx;
2017
0
    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2018
0
    return 0;
2019
0
  }break;
2020
#ifdef HAVE_CAIRO
2021
  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2022
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2023
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2024
    _dec->telemetry_mbmode=*(int *)_buf;
2025
    return 0;
2026
  }break;
2027
  case TH_DECCTL_SET_TELEMETRY_MV:{
2028
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2029
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2030
    _dec->telemetry_mv=*(int *)_buf;
2031
    return 0;
2032
  }break;
2033
  case TH_DECCTL_SET_TELEMETRY_QI:{
2034
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036
    _dec->telemetry_qi=*(int *)_buf;
2037
    return 0;
2038
  }break;
2039
  case TH_DECCTL_SET_TELEMETRY_BITS:{
2040
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2041
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2042
    _dec->telemetry_bits=*(int *)_buf;
2043
    return 0;
2044
  }break;
2045
#endif
2046
0
  default:return TH_EIMPL;
2047
0
  }
2048
0
}
2049
2050
/*We're decoding an INTER frame, but have no initialized reference
2051
   buffers (i.e., decoding did not start on a key frame).
2052
  We initialize them to a solid gray here.*/
2053
25
static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2054
25
  th_info   *info;
2055
25
  size_t     yplane_sz;
2056
25
  size_t     cplane_sz;
2057
25
  ptrdiff_t  yoffset;
2058
25
  int        yhstride;
2059
25
  int        yheight;
2060
25
  int        chstride;
2061
25
  int        cheight;
2062
25
  _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2063
25
  _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2064
25
  _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2065
25
  _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2066
25
   _dec->state.ref_frame_data[OC_FRAME_PREV]=
2067
25
   _dec->state.ref_frame_data[OC_FRAME_SELF]=
2068
25
   _dec->state.ref_frame_bufs[0][0].data;
2069
25
  memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2070
25
   sizeof(_dec->pp_frame_buf[0])*3);
2071
25
  info=&_dec->state.info;
2072
25
  yhstride=abs(_dec->state.ref_ystride[0]);
2073
25
  yheight=info->frame_height+2*OC_UMV_PADDING;
2074
25
  chstride=abs(_dec->state.ref_ystride[1]);
2075
25
  cheight=yheight>>!(info->pixel_fmt&2);
2076
25
  yplane_sz=yhstride*(size_t)yheight+16;
2077
25
  cplane_sz=chstride*(size_t)cheight;
2078
25
  yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2079
25
  memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2080
25
}
2081
2082
#if defined(HAVE_CAIRO)
2083
static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
2084
 int _telemetry){
2085
  /*Stuff the plane into cairo.*/
2086
  cairo_surface_t *cs;
2087
  unsigned char   *data;
2088
  unsigned char   *y_row;
2089
  unsigned char   *u_row;
2090
  unsigned char   *v_row;
2091
  unsigned char   *rgb_row;
2092
  int              cstride;
2093
  int              w;
2094
  int              h;
2095
  int              x;
2096
  int              y;
2097
  int              hdec;
2098
  int              vdec;
2099
  w=_ycbcr[0].width;
2100
  h=_ycbcr[0].height;
2101
  hdec=!(_dec->state.info.pixel_fmt&1);
2102
  vdec=!(_dec->state.info.pixel_fmt&2);
2103
  /*Lazy data buffer init.
2104
    We could try to reuse the post-processing buffer, which would save
2105
     memory, but complicate the allocation logic there.
2106
    I don't think anyone cares about memory usage when using telemetry; it is
2107
     not meant for embedded devices.*/
2108
  if(_dec->telemetry_frame_data==NULL){
2109
    _dec->telemetry_frame_data=_ogg_malloc(
2110
     (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2111
    if(_dec->telemetry_frame_data==NULL)return;
2112
  }
2113
  cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2114
  /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2115
  data=cairo_image_surface_get_data(cs);
2116
  if(data==NULL){
2117
    cairo_surface_destroy(cs);
2118
    return;
2119
  }
2120
  cstride=cairo_image_surface_get_stride(cs);
2121
  y_row=_ycbcr[0].data;
2122
  u_row=_ycbcr[1].data;
2123
  v_row=_ycbcr[2].data;
2124
  rgb_row=data;
2125
  for(y=0;y<h;y++){
2126
    for(x=0;x<w;x++){
2127
      int r;
2128
      int g;
2129
      int b;
2130
      r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2131
      g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2132
       -2672387*v_row[x>>hdec]+447306710)/3287200;
2133
      b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2134
      rgb_row[4*x+0]=OC_CLAMP255(b);
2135
      rgb_row[4*x+1]=OC_CLAMP255(g);
2136
      rgb_row[4*x+2]=OC_CLAMP255(r);
2137
    }
2138
    y_row+=_ycbcr[0].stride;
2139
    u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2140
    v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2141
    rgb_row+=cstride;
2142
  }
2143
  /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2144
  {
2145
    cairo_t           *c;
2146
    const oc_fragment *frags;
2147
    oc_mv             *frag_mvs;
2148
    const signed char *mb_modes;
2149
    oc_mb_map         *mb_maps;
2150
    size_t             nmbs;
2151
    size_t             mbi;
2152
    int                row2;
2153
    int                col2;
2154
    int                qim[3]={0,0,0};
2155
    if(_dec->state.nqis==2){
2156
      int bqi;
2157
      bqi=_dec->state.qis[0];
2158
      if(_dec->state.qis[1]>bqi)qim[1]=1;
2159
      if(_dec->state.qis[1]<bqi)qim[1]=-1;
2160
    }
2161
    if(_dec->state.nqis==3){
2162
      int bqi;
2163
      int cqi;
2164
      int dqi;
2165
      bqi=_dec->state.qis[0];
2166
      cqi=_dec->state.qis[1];
2167
      dqi=_dec->state.qis[2];
2168
      if(cqi>bqi&&dqi>bqi){
2169
        if(dqi>cqi){
2170
          qim[1]=1;
2171
          qim[2]=2;
2172
        }
2173
        else{
2174
          qim[1]=2;
2175
          qim[2]=1;
2176
        }
2177
      }
2178
      else if(cqi<bqi&&dqi<bqi){
2179
        if(dqi<cqi){
2180
          qim[1]=-1;
2181
          qim[2]=-2;
2182
        }
2183
        else{
2184
          qim[1]=-2;
2185
          qim[2]=-1;
2186
        }
2187
      }
2188
      else{
2189
        if(cqi<bqi)qim[1]=-1;
2190
        else qim[1]=1;
2191
        if(dqi<bqi)qim[2]=-1;
2192
        else qim[2]=1;
2193
      }
2194
    }
2195
    c=cairo_create(cs);
2196
    frags=_dec->state.frags;
2197
    frag_mvs=_dec->state.frag_mvs;
2198
    mb_modes=_dec->state.mb_modes;
2199
    mb_maps=_dec->state.mb_maps;
2200
    nmbs=_dec->state.nmbs;
2201
    row2=0;
2202
    col2=0;
2203
    for(mbi=0;mbi<nmbs;mbi++){
2204
      float x;
2205
      float y;
2206
      int   bi;
2207
      y=h-(row2+((col2+1>>1)&1))*16-16;
2208
      x=(col2>>1)*16;
2209
      cairo_set_line_width(c,1.);
2210
      /*Keyframe (all intra) red box.*/
2211
      if(_dec->state.frame_type==OC_INTRA_FRAME){
2212
        if(_dec->telemetry_mbmode&0x02){
2213
          cairo_set_source_rgba(c,1.,0,0,.5);
2214
          cairo_rectangle(c,x+2.5,y+2.5,11,11);
2215
          cairo_stroke_preserve(c);
2216
          cairo_set_source_rgba(c,1.,0,0,.25);
2217
          cairo_fill(c);
2218
        }
2219
      }
2220
      else{
2221
        ptrdiff_t fragi;
2222
        int       frag_mvx;
2223
        int       frag_mvy;
2224
        for(bi=0;bi<4;bi++){
2225
          fragi=mb_maps[mbi][0][bi];
2226
          if(fragi>=0&&frags[fragi].coded){
2227
            frag_mvx=OC_MV_X(frag_mvs[fragi]);
2228
            frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2229
            break;
2230
          }
2231
        }
2232
        if(bi<4){
2233
          switch(mb_modes[mbi]){
2234
            case OC_MODE_INTRA:{
2235
              if(_dec->telemetry_mbmode&0x02){
2236
                cairo_set_source_rgba(c,1.,0,0,.5);
2237
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2238
                cairo_stroke_preserve(c);
2239
                cairo_set_source_rgba(c,1.,0,0,.25);
2240
                cairo_fill(c);
2241
              }
2242
            }break;
2243
            case OC_MODE_INTER_NOMV:{
2244
              if(_dec->telemetry_mbmode&0x01){
2245
                cairo_set_source_rgba(c,0,0,1.,.5);
2246
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2247
                cairo_stroke_preserve(c);
2248
                cairo_set_source_rgba(c,0,0,1.,.25);
2249
                cairo_fill(c);
2250
              }
2251
            }break;
2252
            case OC_MODE_INTER_MV:{
2253
              if(_dec->telemetry_mbmode&0x04){
2254
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2255
                cairo_set_source_rgba(c,0,1.,0,.5);
2256
                cairo_stroke(c);
2257
              }
2258
              if(_dec->telemetry_mv&0x04){
2259
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2260
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2261
                cairo_set_line_width(c,3.);
2262
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2263
                cairo_stroke_preserve(c);
2264
                cairo_set_line_width(c,2.);
2265
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2266
                cairo_stroke_preserve(c);
2267
                cairo_set_line_width(c,1.);
2268
                cairo_line_to(c,x+8,y+8);
2269
                cairo_stroke(c);
2270
              }
2271
            }break;
2272
            case OC_MODE_INTER_MV_LAST:{
2273
              if(_dec->telemetry_mbmode&0x08){
2274
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2275
                cairo_set_source_rgba(c,0,1.,0,.5);
2276
                cairo_move_to(c,x+13.5,y+2.5);
2277
                cairo_line_to(c,x+2.5,y+8);
2278
                cairo_line_to(c,x+13.5,y+13.5);
2279
                cairo_stroke(c);
2280
              }
2281
              if(_dec->telemetry_mv&0x08){
2282
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2283
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2284
                cairo_set_line_width(c,3.);
2285
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2286
                cairo_stroke_preserve(c);
2287
                cairo_set_line_width(c,2.);
2288
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2289
                cairo_stroke_preserve(c);
2290
                cairo_set_line_width(c,1.);
2291
                cairo_line_to(c,x+8,y+8);
2292
                cairo_stroke(c);
2293
              }
2294
            }break;
2295
            case OC_MODE_INTER_MV_LAST2:{
2296
              if(_dec->telemetry_mbmode&0x10){
2297
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2298
                cairo_set_source_rgba(c,0,1.,0,.5);
2299
                cairo_move_to(c,x+8,y+2.5);
2300
                cairo_line_to(c,x+2.5,y+8);
2301
                cairo_line_to(c,x+8,y+13.5);
2302
                cairo_move_to(c,x+13.5,y+2.5);
2303
                cairo_line_to(c,x+8,y+8);
2304
                cairo_line_to(c,x+13.5,y+13.5);
2305
                cairo_stroke(c);
2306
              }
2307
              if(_dec->telemetry_mv&0x10){
2308
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2309
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2310
                cairo_set_line_width(c,3.);
2311
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2312
                cairo_stroke_preserve(c);
2313
                cairo_set_line_width(c,2.);
2314
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2315
                cairo_stroke_preserve(c);
2316
                cairo_set_line_width(c,1.);
2317
                cairo_line_to(c,x+8,y+8);
2318
                cairo_stroke(c);
2319
              }
2320
            }break;
2321
            case OC_MODE_GOLDEN_NOMV:{
2322
              if(_dec->telemetry_mbmode&0x20){
2323
                cairo_set_source_rgba(c,1.,1.,0,.5);
2324
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2325
                cairo_stroke_preserve(c);
2326
                cairo_set_source_rgba(c,1.,1.,0,.25);
2327
                cairo_fill(c);
2328
              }
2329
            }break;
2330
            case OC_MODE_GOLDEN_MV:{
2331
              if(_dec->telemetry_mbmode&0x40){
2332
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2333
                cairo_set_source_rgba(c,1.,1.,0,.5);
2334
                cairo_stroke(c);
2335
              }
2336
              if(_dec->telemetry_mv&0x40){
2337
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2338
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2339
                cairo_set_line_width(c,3.);
2340
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2341
                cairo_stroke_preserve(c);
2342
                cairo_set_line_width(c,2.);
2343
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2344
                cairo_stroke_preserve(c);
2345
                cairo_set_line_width(c,1.);
2346
                cairo_line_to(c,x+8,y+8);
2347
                cairo_stroke(c);
2348
              }
2349
            }break;
2350
            case OC_MODE_INTER_MV_FOUR:{
2351
              if(_dec->telemetry_mbmode&0x80){
2352
                cairo_rectangle(c,x+2.5,y+2.5,4,4);
2353
                cairo_rectangle(c,x+9.5,y+2.5,4,4);
2354
                cairo_rectangle(c,x+2.5,y+9.5,4,4);
2355
                cairo_rectangle(c,x+9.5,y+9.5,4,4);
2356
                cairo_set_source_rgba(c,0,1.,0,.5);
2357
                cairo_stroke(c);
2358
              }
2359
              /*4mv is odd, coded in raster order.*/
2360
              fragi=mb_maps[mbi][0][0];
2361
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2362
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2363
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2364
                cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2365
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2366
                cairo_set_line_width(c,3.);
2367
                cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2368
                cairo_stroke_preserve(c);
2369
                cairo_set_line_width(c,2.);
2370
                cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2371
                cairo_stroke_preserve(c);
2372
                cairo_set_line_width(c,1.);
2373
                cairo_line_to(c,x+4,y+12);
2374
                cairo_stroke(c);
2375
              }
2376
              fragi=mb_maps[mbi][0][1];
2377
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2378
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2379
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2380
                cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2381
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2382
                cairo_set_line_width(c,3.);
2383
                cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2384
                cairo_stroke_preserve(c);
2385
                cairo_set_line_width(c,2.);
2386
                cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2387
                cairo_stroke_preserve(c);
2388
                cairo_set_line_width(c,1.);
2389
                cairo_line_to(c,x+12,y+12);
2390
                cairo_stroke(c);
2391
              }
2392
              fragi=mb_maps[mbi][0][2];
2393
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2394
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2395
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2396
                cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2397
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2398
                cairo_set_line_width(c,3.);
2399
                cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2400
                cairo_stroke_preserve(c);
2401
                cairo_set_line_width(c,2.);
2402
                cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2403
                cairo_stroke_preserve(c);
2404
                cairo_set_line_width(c,1.);
2405
                cairo_line_to(c,x+4,y+4);
2406
                cairo_stroke(c);
2407
              }
2408
              fragi=mb_maps[mbi][0][3];
2409
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2410
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2411
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2412
                cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2413
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2414
                cairo_set_line_width(c,3.);
2415
                cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2416
                cairo_stroke_preserve(c);
2417
                cairo_set_line_width(c,2.);
2418
                cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2419
                cairo_stroke_preserve(c);
2420
                cairo_set_line_width(c,1.);
2421
                cairo_line_to(c,x+12,y+4);
2422
                cairo_stroke(c);
2423
              }
2424
            }break;
2425
          }
2426
        }
2427
      }
2428
      /*qii illustration.*/
2429
      if(_dec->telemetry_qi&0x2){
2430
        cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2431
        for(bi=0;bi<4;bi++){
2432
          ptrdiff_t fragi;
2433
          int       qiv;
2434
          int       xp;
2435
          int       yp;
2436
          xp=x+(bi&1)*8;
2437
          yp=y+8-(bi&2)*4;
2438
          fragi=mb_maps[mbi][0][bi];
2439
          if(fragi>=0&&frags[fragi].coded){
2440
            qiv=qim[frags[fragi].qii];
2441
            cairo_set_line_width(c,3.);
2442
            cairo_set_source_rgba(c,0.,0.,0.,.5);
2443
            switch(qiv){
2444
              /*Double plus:*/
2445
              case 2:{
2446
                if((bi&1)^((bi&2)>>1)){
2447
                  cairo_move_to(c,xp+2.5,yp+1.5);
2448
                  cairo_line_to(c,xp+2.5,yp+3.5);
2449
                  cairo_move_to(c,xp+1.5,yp+2.5);
2450
                  cairo_line_to(c,xp+3.5,yp+2.5);
2451
                  cairo_move_to(c,xp+5.5,yp+4.5);
2452
                  cairo_line_to(c,xp+5.5,yp+6.5);
2453
                  cairo_move_to(c,xp+4.5,yp+5.5);
2454
                  cairo_line_to(c,xp+6.5,yp+5.5);
2455
                  cairo_stroke_preserve(c);
2456
                  cairo_set_source_rgba(c,0.,1.,1.,1.);
2457
                }
2458
                else{
2459
                  cairo_move_to(c,xp+5.5,yp+1.5);
2460
                  cairo_line_to(c,xp+5.5,yp+3.5);
2461
                  cairo_move_to(c,xp+4.5,yp+2.5);
2462
                  cairo_line_to(c,xp+6.5,yp+2.5);
2463
                  cairo_move_to(c,xp+2.5,yp+4.5);
2464
                  cairo_line_to(c,xp+2.5,yp+6.5);
2465
                  cairo_move_to(c,xp+1.5,yp+5.5);
2466
                  cairo_line_to(c,xp+3.5,yp+5.5);
2467
                  cairo_stroke_preserve(c);
2468
                  cairo_set_source_rgba(c,0.,1.,1.,1.);
2469
                }
2470
              }break;
2471
              /*Double minus:*/
2472
              case -2:{
2473
                cairo_move_to(c,xp+2.5,yp+2.5);
2474
                cairo_line_to(c,xp+5.5,yp+2.5);
2475
                cairo_move_to(c,xp+2.5,yp+5.5);
2476
                cairo_line_to(c,xp+5.5,yp+5.5);
2477
                cairo_stroke_preserve(c);
2478
                cairo_set_source_rgba(c,1.,1.,1.,1.);
2479
              }break;
2480
              /*Plus:*/
2481
              case 1:{
2482
                if((bi&2)==0)yp-=2;
2483
                if((bi&1)==0)xp-=2;
2484
                cairo_move_to(c,xp+4.5,yp+2.5);
2485
                cairo_line_to(c,xp+4.5,yp+6.5);
2486
                cairo_move_to(c,xp+2.5,yp+4.5);
2487
                cairo_line_to(c,xp+6.5,yp+4.5);
2488
                cairo_stroke_preserve(c);
2489
                cairo_set_source_rgba(c,.1,1.,.3,1.);
2490
                break;
2491
              }
2492
              /*Fall through.*/
2493
              /*Minus:*/
2494
              case -1:{
2495
                cairo_move_to(c,xp+2.5,yp+4.5);
2496
                cairo_line_to(c,xp+6.5,yp+4.5);
2497
                cairo_stroke_preserve(c);
2498
                cairo_set_source_rgba(c,1.,.3,.1,1.);
2499
              }break;
2500
              default:continue;
2501
            }
2502
            cairo_set_line_width(c,1.);
2503
            cairo_stroke(c);
2504
          }
2505
        }
2506
      }
2507
      col2++;
2508
      if((col2>>1)>=_dec->state.nhmbs){
2509
        col2=0;
2510
        row2+=2;
2511
      }
2512
    }
2513
    /*Bit usage indicator[s]:*/
2514
    if(_dec->telemetry_bits){
2515
      int widths[6];
2516
      int fpsn;
2517
      int fpsd;
2518
      int mult;
2519
      int fullw;
2520
      int padw;
2521
      int i;
2522
      fpsn=_dec->state.info.fps_numerator;
2523
      fpsd=_dec->state.info.fps_denominator;
2524
      mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2525
      fullw=250.f*h*fpsd*mult/fpsn;
2526
      padw=w-24;
2527
      /*Header and coded block bits.*/
2528
      if(_dec->telemetry_frame_bytes<0||
2529
       _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2530
        _dec->telemetry_frame_bytes=0;
2531
      }
2532
      if(_dec->telemetry_coding_bytes<0||
2533
       _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2534
        _dec->telemetry_coding_bytes=0;
2535
      }
2536
      if(_dec->telemetry_mode_bytes<0||
2537
       _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2538
        _dec->telemetry_mode_bytes=0;
2539
      }
2540
      if(_dec->telemetry_mv_bytes<0||
2541
       _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2542
        _dec->telemetry_mv_bytes=0;
2543
      }
2544
      if(_dec->telemetry_qi_bytes<0||
2545
       _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2546
        _dec->telemetry_qi_bytes=0;
2547
      }
2548
      if(_dec->telemetry_dc_bytes<0||
2549
       _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2550
        _dec->telemetry_dc_bytes=0;
2551
      }
2552
      widths[0]=padw*
2553
       (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2554
      widths[1]=padw*
2555
       (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2556
      widths[2]=padw*
2557
       (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2558
      widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2559
      widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2560
      widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2561
      for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2562
      cairo_set_source_rgba(c,.0,.0,.0,.6);
2563
      cairo_rectangle(c,10,h-33,widths[0]+1,5);
2564
      cairo_rectangle(c,10,h-29,widths[1]+1,5);
2565
      cairo_rectangle(c,10,h-25,widths[2]+1,5);
2566
      cairo_rectangle(c,10,h-21,widths[3]+1,5);
2567
      cairo_rectangle(c,10,h-17,widths[4]+1,5);
2568
      cairo_rectangle(c,10,h-13,widths[5]+1,5);
2569
      cairo_fill(c);
2570
      cairo_set_source_rgb(c,1,0,0);
2571
      cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2572
      cairo_fill(c);
2573
      cairo_set_source_rgb(c,0,1,0);
2574
      cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2575
      cairo_fill(c);
2576
      cairo_set_source_rgb(c,0,0,1);
2577
      cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2578
      cairo_fill(c);
2579
      cairo_set_source_rgb(c,.6,.4,.0);
2580
      cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2581
      cairo_fill(c);
2582
      cairo_set_source_rgb(c,.3,.3,.3);
2583
      cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2584
      cairo_fill(c);
2585
      cairo_set_source_rgb(c,.5,.5,.8);
2586
      cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2587
      cairo_fill(c);
2588
    }
2589
    /*Master qi indicator[s]:*/
2590
    if(_dec->telemetry_qi&0x1){
2591
      cairo_text_extents_t extents;
2592
      char                 buffer[10];
2593
      int                  p;
2594
      int                  y;
2595
      p=0;
2596
      y=h-7.5;
2597
      if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2598
      buffer[p++]=48+_dec->state.qis[0]%10;
2599
      if(_dec->state.nqis>=2){
2600
        buffer[p++]=' ';
2601
        if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2602
        buffer[p++]=48+_dec->state.qis[1]%10;
2603
      }
2604
      if(_dec->state.nqis==3){
2605
        buffer[p++]=' ';
2606
        if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2607
        buffer[p++]=48+_dec->state.qis[2]%10;
2608
      }
2609
      buffer[p++]='\0';
2610
      cairo_select_font_face(c,"sans",
2611
       CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2612
      cairo_set_font_size(c,18);
2613
      cairo_text_extents(c,buffer,&extents);
2614
      cairo_set_source_rgb(c,1,1,1);
2615
      cairo_move_to(c,w-extents.x_advance-10,y);
2616
      cairo_show_text(c,buffer);
2617
      cairo_set_source_rgb(c,0,0,0);
2618
      cairo_move_to(c,w-extents.x_advance-10,y);
2619
      cairo_text_path(c,buffer);
2620
      cairo_set_line_width(c,.8);
2621
      cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2622
      cairo_stroke(c);
2623
    }
2624
    cairo_destroy(c);
2625
  }
2626
  /*Out of the Cairo plane into the telemetry YUV buffer.*/
2627
  _ycbcr[0].data=_dec->telemetry_frame_data;
2628
  _ycbcr[0].stride=_ycbcr[0].width;
2629
  _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2630
  _ycbcr[1].stride=_ycbcr[1].width;
2631
  _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2632
  _ycbcr[2].stride=_ycbcr[2].width;
2633
  y_row=_ycbcr[0].data;
2634
  u_row=_ycbcr[1].data;
2635
  v_row=_ycbcr[2].data;
2636
  rgb_row=data;
2637
  /*This is one of the few places it's worth handling chroma on a
2638
     case-by-case basis.*/
2639
  switch(_dec->state.info.pixel_fmt){
2640
    case TH_PF_420:{
2641
      for(y=0;y<h;y+=2){
2642
        unsigned char *y_row2;
2643
        unsigned char *rgb_row2;
2644
        y_row2=y_row+_ycbcr[0].stride;
2645
        rgb_row2=rgb_row+cstride;
2646
        for(x=0;x<w;x+=2){
2647
          int y;
2648
          int u;
2649
          int v;
2650
          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2651
           +24966*rgb_row[4*x+0]+4207500)/255000;
2652
          y_row[x]=OC_CLAMP255(y);
2653
          y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2654
           +24966*rgb_row[4*x+4]+4207500)/255000;
2655
          y_row[x+1]=OC_CLAMP255(y);
2656
          y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2657
           +24966*rgb_row2[4*x+0]+4207500)/255000;
2658
          y_row2[x]=OC_CLAMP255(y);
2659
          y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2660
           +24966*rgb_row2[4*x+4]+4207500)/255000;
2661
          y_row2[x+1]=OC_CLAMP255(y);
2662
          u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2663
           +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2664
           -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2665
           +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2666
           +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2667
           +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2668
          v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2669
           +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2670
           -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2671
            +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2672
           -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2673
            +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2674
          u_row[x>>1]=OC_CLAMP255(u);
2675
          v_row[x>>1]=OC_CLAMP255(v);
2676
        }
2677
        y_row+=_ycbcr[0].stride*2;
2678
        u_row+=_ycbcr[1].stride;
2679
        v_row+=_ycbcr[2].stride;
2680
        rgb_row+=cstride*2;
2681
      }
2682
    }break;
2683
    case TH_PF_422:{
2684
      for(y=0;y<h;y++){
2685
        for(x=0;x<w;x+=2){
2686
          int y;
2687
          int u;
2688
          int v;
2689
          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2690
           +24966*rgb_row[4*x+0]+4207500)/255000;
2691
          y_row[x]=OC_CLAMP255(y);
2692
          y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2693
           +24966*rgb_row[4*x+4]+4207500)/255000;
2694
          y_row[x+1]=OC_CLAMP255(y);
2695
          u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2696
           -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2697
           +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2698
          v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2699
           -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2700
           -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2701
          u_row[x>>1]=OC_CLAMP255(u);
2702
          v_row[x>>1]=OC_CLAMP255(v);
2703
        }
2704
        y_row+=_ycbcr[0].stride;
2705
        u_row+=_ycbcr[1].stride;
2706
        v_row+=_ycbcr[2].stride;
2707
        rgb_row+=cstride;
2708
      }
2709
    }break;
2710
    /*case TH_PF_444:*/
2711
    default:{
2712
      for(y=0;y<h;y++){
2713
        for(x=0;x<w;x++){
2714
          int y;
2715
          int u;
2716
          int v;
2717
          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2718
           +24966*rgb_row[4*x+0]+4207500)/255000;
2719
          u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2720
           +99232*rgb_row[4*x+0]+29032005)/225930;
2721
          v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2722
           -25536*rgb_row[4*x+0]+45940035)/357510;
2723
          y_row[x]=OC_CLAMP255(y);
2724
          u_row[x]=OC_CLAMP255(u);
2725
          v_row[x]=OC_CLAMP255(v);
2726
        }
2727
        y_row+=_ycbcr[0].stride;
2728
        u_row+=_ycbcr[1].stride;
2729
        v_row+=_ycbcr[2].stride;
2730
        rgb_row+=cstride;
2731
      }
2732
    }break;
2733
  }
2734
  /*Finished.
2735
    Destroy the surface.*/
2736
  cairo_surface_destroy(cs);
2737
}
2738
#endif
2739
2740
int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2741
8.03k
 ogg_int64_t *_granpos){
2742
8.03k
  int ret;
2743
8.03k
  if(_dec==NULL||_op==NULL)return TH_EFAULT;
2744
  /*A completely empty packet indicates a dropped frame and is treated exactly
2745
     like an inter frame with no coded blocks.*/
2746
8.03k
  if(_op->bytes==0){
2747
11
    _dec->state.frame_type=OC_INTER_FRAME;
2748
11
    _dec->state.ntotal_coded_fragis=0;
2749
11
  }
2750
8.02k
  else{
2751
8.02k
    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2752
8.02k
    ret=oc_dec_frame_header_unpack(_dec);
2753
8.02k
    if(ret<0)return ret;
2754
6.23k
    if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2755
402
    else oc_dec_coded_flags_unpack(_dec);
2756
6.23k
  }
2757
  /*If there have been no reference frames, and we need one, initialize one.*/
2758
6.24k
  if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2759
413
   (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2760
388
   _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2761
25
    oc_dec_init_dummy_frame(_dec);
2762
25
  }
2763
  /*If this was an inter frame with no coded blocks...*/
2764
6.24k
  if(_dec->state.ntotal_coded_fragis<=0){
2765
    /*Just update the granule position and return.*/
2766
11
    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2767
11
     _dec->state.info.keyframe_granule_shift)
2768
11
     +(_dec->state.curframe_num-_dec->state.keyframe_num);
2769
11
    _dec->state.curframe_num++;
2770
11
    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2771
11
    return TH_DUPFRAME;
2772
11
  }
2773
6.23k
  else{
2774
6.23k
    th_ycbcr_buffer stripe_buf;
2775
6.23k
    int             stripe_fragy;
2776
6.23k
    int             refi;
2777
6.23k
    int             pli;
2778
6.23k
    int             notstart;
2779
6.23k
    int             notdone;
2780
#ifdef HAVE_CAIRO
2781
    int             telemetry;
2782
    /*Save the current telemetry state.
2783
      This prevents it from being modified in the middle of decoding this
2784
       frame, which could cause us to skip calls to the striped decoding
2785
       callback.*/
2786
    telemetry=_dec->telemetry_mbmode||_dec->telemetry_mv||
2787
     _dec->telemetry_qi||_dec->telemetry_bits;
2788
#endif
2789
    /*Select a free buffer to use for the reconstructed version of this frame.*/
2790
9.61k
    for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2791
6.43k
     refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2792
6.23k
    _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2793
6.23k
    _dec->state.ref_frame_data[OC_FRAME_SELF]=
2794
6.23k
     _dec->state.ref_frame_bufs[refi][0].data;
2795
#if defined(HAVE_CAIRO)
2796
    _dec->telemetry_frame_bytes=_op->bytes;
2797
#endif
2798
6.23k
    if(_dec->state.frame_type==OC_INTRA_FRAME){
2799
5.82k
      _dec->state.keyframe_num=_dec->state.curframe_num;
2800
#if defined(HAVE_CAIRO)
2801
      _dec->telemetry_coding_bytes=
2802
       _dec->telemetry_mode_bytes=
2803
       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2804
#endif
2805
5.82k
    }
2806
402
    else{
2807
#if defined(HAVE_CAIRO)
2808
      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2809
#endif
2810
402
      oc_dec_mb_modes_unpack(_dec);
2811
#if defined(HAVE_CAIRO)
2812
      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2813
#endif
2814
402
      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2815
#if defined(HAVE_CAIRO)
2816
      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2817
#endif
2818
402
    }
2819
6.23k
    oc_dec_block_qis_unpack(_dec);
2820
#if defined(HAVE_CAIRO)
2821
    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2822
#endif
2823
6.23k
    oc_dec_residual_tokens_unpack(_dec);
2824
    /*Update granule position.
2825
      This must be done before the striped decode callbacks so that the
2826
       application knows what to do with the frame data.*/
2827
6.23k
    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2828
6.23k
     _dec->state.info.keyframe_granule_shift)
2829
6.23k
     +(_dec->state.curframe_num-_dec->state.keyframe_num);
2830
6.23k
    _dec->state.curframe_num++;
2831
6.23k
    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2832
    /*All of the rest of the operations -- DC prediction reversal,
2833
       reconstructing coded fragments, copying uncoded fragments, loop
2834
       filtering, extending borders, and out-of-loop post-processing -- should
2835
       be pipelined.
2836
      I.e., DC prediction reversal, reconstruction, and uncoded fragment
2837
       copying are done for one or two super block rows, then loop filtering is
2838
       run as far as it can, then bordering copying, then post-processing.
2839
      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2840
       block rows, and one chroma.
2841
      Otherwise, an MCU consists of one super block row from each plane.
2842
      Inside each MCU, we perform all of the steps on one color plane before
2843
       moving on to the next.
2844
      After reconstruction, the additional filtering stages introduce a delay
2845
       since they need some pixels from the next fragment row.
2846
      Thus the actual number of decoded rows available is slightly smaller for
2847
       the first MCU, and slightly larger for the last.
2848
2849
      This entire process allows us to operate on the data while it is still in
2850
       cache, resulting in big performance improvements.
2851
      An application callback allows further application processing (blitting
2852
       to video memory, color conversion, etc.) to also use the data while it's
2853
       in cache.*/
2854
6.23k
    oc_dec_pipeline_init(_dec,&_dec->pipe);
2855
6.23k
    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2856
6.23k
    notstart=0;
2857
6.23k
    notdone=1;
2858
34.5k
    for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2859
28.3k
      int avail_fragy0;
2860
28.3k
      int avail_fragy_end;
2861
28.3k
      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2862
28.3k
      notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2863
113k
      for(pli=0;pli<3;pli++){
2864
84.9k
        oc_fragment_plane *fplane;
2865
84.9k
        int                frag_shift;
2866
84.9k
        int                pp_offset;
2867
84.9k
        int                sdelay;
2868
84.9k
        int                edelay;
2869
84.9k
        fplane=_dec->state.fplanes+pli;
2870
        /*Compute the first and last fragment row of the current MCU for this
2871
           plane.*/
2872
84.9k
        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2873
84.9k
        _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2874
84.9k
        _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2875
84.9k
         _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2876
84.9k
        oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2877
84.9k
        oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2878
84.9k
        sdelay=edelay=0;
2879
84.9k
        if(_dec->pipe.loop_filter){
2880
76.5k
          sdelay+=notstart;
2881
76.5k
          edelay+=notdone;
2882
76.5k
          oc_state_loop_filter_frag_rows(&_dec->state,
2883
76.5k
           _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2884
76.5k
           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2885
76.5k
        }
2886
        /*To fill the borders, we have an additional two pixel delay, since a
2887
           fragment in the next row could filter its top edge, using two pixels
2888
           from a fragment in this row.
2889
          But there's no reason to delay a full fragment between the two.*/
2890
84.9k
        oc_state_borders_fill_rows(&_dec->state,refi,pli,
2891
84.9k
         (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2892
84.9k
         (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2893
        /*Out-of-loop post-processing.*/
2894
84.9k
        pp_offset=3*(pli!=0);
2895
84.9k
        if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2896
          /*Perform de-blocking in one plane.*/
2897
0
          sdelay+=notstart;
2898
0
          edelay+=notdone;
2899
0
          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2900
0
           _dec->state.ref_frame_bufs[refi],pli,
2901
0
           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2902
0
          if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2903
            /*Perform de-ringing in one plane.*/
2904
0
            sdelay+=notstart;
2905
0
            edelay+=notdone;
2906
0
            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2907
0
             _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2908
0
          }
2909
0
        }
2910
        /*If no post-processing is done, we still need to delay a row for the
2911
           loop filter, thanks to the strange filtering order VP3 chose.*/
2912
84.9k
        else if(_dec->pipe.loop_filter){
2913
76.5k
          sdelay+=notstart;
2914
76.5k
          edelay+=notdone;
2915
76.5k
        }
2916
        /*Compute the intersection of the available rows in all planes.
2917
          If chroma is sub-sampled, the effect of each of its delays is
2918
           doubled, but luma might have more post-processing filters enabled
2919
           than chroma, so we don't know up front which one is the limiting
2920
           factor.*/
2921
84.9k
        avail_fragy0=OC_MINI(avail_fragy0,
2922
84.9k
         _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2923
84.9k
        avail_fragy_end=OC_MINI(avail_fragy_end,
2924
84.9k
         _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2925
84.9k
      }
2926
#ifdef HAVE_CAIRO
2927
      if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
2928
#else
2929
28.3k
      if(_dec->stripe_cb.stripe_decoded!=NULL){
2930
0
#endif
2931
        /*The callback might want to use the FPU, so let's make sure they can.
2932
          We violate all kinds of ABI restrictions by not doing this until
2933
           now, but none of them actually matter since we don't use floating
2934
           point ourselves.*/
2935
0
        oc_restore_fpu(&_dec->state);
2936
        /*Make the callback, ensuring we flip the sense of the "start" and
2937
           "end" of the available region upside down.*/
2938
0
        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2939
0
         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2940
0
         _dec->state.fplanes[0].nvfrags-avail_fragy0);
2941
0
      }
2942
28.3k
      notstart=1;
2943
28.3k
    }
2944
    /*Finish filling in the reference frame borders.*/
2945
24.9k
    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2946
    /*Update the reference frame indices.*/
2947
6.23k
    if(_dec->state.frame_type==OC_INTRA_FRAME){
2948
      /*The new frame becomes both the previous and gold reference frames.*/
2949
5.82k
      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2950
5.82k
       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2951
5.82k
       _dec->state.ref_frame_idx[OC_FRAME_SELF];
2952
5.82k
      _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2953
5.82k
       _dec->state.ref_frame_data[OC_FRAME_PREV]=
2954
5.82k
       _dec->state.ref_frame_data[OC_FRAME_SELF];
2955
5.82k
    }
2956
402
    else{
2957
      /*Otherwise, just replace the previous reference frame.*/
2958
402
      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2959
402
       _dec->state.ref_frame_idx[OC_FRAME_SELF];
2960
402
      _dec->state.ref_frame_data[OC_FRAME_PREV]=
2961
402
       _dec->state.ref_frame_data[OC_FRAME_SELF];
2962
402
    }
2963
    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2964
       gamma values, if nothing else).*/
2965
6.23k
    oc_restore_fpu(&_dec->state);
2966
#ifdef HAVE_CAIRO
2967
    /*If telemetry ioctls are active, we need to draw to the output buffer.*/
2968
    if(telemetry){
2969
      oc_render_telemetry(_dec,stripe_buf,telemetry);
2970
      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
2971
      /*If we had a striped decoding callback, we skipped calling it above
2972
         (because the telemetry wasn't rendered yet).
2973
        Call it now with the whole frame.*/
2974
      if(_dec->stripe_cb.stripe_decoded!=NULL){
2975
        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
2976
         stripe_buf,0,_dec->state.fplanes[0].nvfrags);
2977
      }
2978
    }
2979
#endif
2980
#if defined(OC_DUMP_IMAGES)
2981
    /*We only dump images if there were some coded blocks.*/
2982
    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2983
#endif
2984
6.23k
    return 0;
2985
6.23k
  }
2986
6.24k
}
2987
2988
6.18k
int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2989
6.18k
  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2990
6.18k
  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2991
6.18k
  return 0;
2992
6.18k
}