Coverage Report

Created: 2025-07-18 06:42

/src/libtheora/lib/decode.c
Line
Count
Source (jump to first uncovered line)
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7
 *                                                                  *
8
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009,2025           *
9
 * by the Xiph.Org Foundation and contributors                      *
10
 * https://www.xiph.org/                                            *
11
 *                                                                  *
12
 ********************************************************************
13
14
  function:
15
16
 ********************************************************************/
17
18
#include <stdlib.h>
19
#include <string.h>
20
#include <ogg/ogg.h>
21
#include "decint.h"
22
#if defined(OC_DUMP_IMAGES)
23
# include <stdio.h>
24
# include "png.h"
25
#endif
26
#if defined(HAVE_CAIRO)
27
# include <cairo.h>
28
#endif
29
30
31
/*No post-processing.*/
32
3.33k
#define OC_PP_LEVEL_DISABLED  (0)
33
/*Keep track of DC qi for each block only.*/
34
0
#define OC_PP_LEVEL_TRACKDCQI (1)
35
/*Deblock the luma plane.*/
36
42.9k
#define OC_PP_LEVEL_DEBLOCKY  (2)
37
/*Dering the luma plane.*/
38
0
#define OC_PP_LEVEL_DERINGY   (3)
39
/*Stronger luma plane deringing.*/
40
0
#define OC_PP_LEVEL_SDERINGY  (4)
41
/*Deblock the chroma planes.*/
42
0
#define OC_PP_LEVEL_DEBLOCKC  (5)
43
/*Dering the chroma planes.*/
44
#define OC_PP_LEVEL_DERINGC   (6)
45
/*Stronger chroma plane deringing.*/
46
0
#define OC_PP_LEVEL_SDERINGC  (7)
47
/*Maximum valid post-processing level.*/
48
0
#define OC_PP_LEVEL_MAX       (7)
49
50
51
52
/*The mode alphabets for the various mode coding schemes.
53
  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54
static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55
  /*Last MV dominates */
56
  {
57
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59
    OC_MODE_INTER_MV_FOUR
60
  },
61
  {
62
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63
    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64
    OC_MODE_INTER_MV_FOUR
65
  },
66
  {
67
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69
    OC_MODE_INTER_MV_FOUR
70
  },
71
  {
72
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73
    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74
    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75
  },
76
  /*No MV dominates.*/
77
  {
78
    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79
    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80
    OC_MODE_INTER_MV_FOUR
81
  },
82
  {
83
    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84
    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85
    OC_MODE_INTER_MV_FOUR
86
  },
87
  /*Default ordering.*/
88
  {
89
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90
    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91
    OC_MODE_INTER_MV_FOUR
92
  }
93
};
94
95
96
/*The original DCT tokens are extended and reordered during the construction of
97
   the Huffman tables.
98
  The extension means more bits can be read with fewer calls to the bitpacker
99
   during the Huffman decoding process (at the cost of larger Huffman tables),
100
   and fewer tokens require additional extra bits (reducing the average storage
101
   per decoded token).
102
  The revised ordering reveals essential information in the token value
103
   itself; specifically, whether or not there are additional extra bits to read
104
   and the parameter to which those extra bits are applied.
105
  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106
  The extra bits are added into code word at the bit position inferred from the
107
   token value, giving the final code word from which all required parameters
108
   are derived.
109
  The number of EOBs and the leading zero run length can be extracted directly.
110
  The coefficient magnitude is optionally negated before extraction, according
111
   to a 'flip' bit.*/
112
113
/*The number of additional extra bits that are decoded with each of the
114
   internal DCT tokens.*/
115
static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116
  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117
};
118
119
/*Whether or not an internal token needs any additional extra bits.*/
120
#define OC_DCT_TOKEN_NEEDS_MORE(token) \
121
210M
 (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122
210M
  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124
/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125
157M
#define OC_DCT_TOKEN_FAT_EOB (0)
126
127
/*The number of EOBs to use for an end-of-frame token.
128
  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129
   is not yet available everywhere; this should be equivalent.*/
130
830
#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132
/*The location of the (6) run length bits in the code word.
133
  These are placed at index 0 and given 8 bits (even though 6 would suffice)
134
   because it may be faster to extract the lower byte on some platforms.*/
135
210M
#define OC_DCT_CW_RLEN_SHIFT (0)
136
/*The location of the (12) EOB bits in the code word.*/
137
315M
#define OC_DCT_CW_EOB_SHIFT  (8)
138
/*The location of the (1) flip bit in the code word.
139
  This must be right under the magnitude bits.*/
140
110M
#define OC_DCT_CW_FLIP_BIT   (20)
141
/*The location of the (11) token magnitude bits in the code word.
142
  These must be last, and rely on a sign-extending right shift.*/
143
320M
#define OC_DCT_CW_MAG_SHIFT  (21)
144
145
/*Pack the given fields into a code word.*/
146
#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147
 ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148
 (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149
 (_flip)<<OC_DCT_CW_FLIP_BIT| \
150
 ((_mag)-(_flip))*(1<<OC_DCT_CW_MAG_SHIFT))
151
152
/*A special code word value that signals the end of the frame (a long EOB run
153
   of zero).*/
154
105M
#define OC_DCT_CW_FINISH (0)
155
156
/*The position at which to insert the extra bits in the code word.
157
  We use this formulation because Intel has no useful cmov.
158
  A real architecture would probably do better with two of those.
159
  This translates to 11 instructions(!), and is _still_ faster than either a
160
   table lookup (just barely) or the naive double-ternary implementation (which
161
   gcc translates to a jump and a cmov).
162
  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163
   you want to make one of the other shifts zero.*/
164
#define OC_DCT_TOKEN_EB_POS(_token) \
165
104M
 ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166
104M
 +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168
/*The code words for each internal token.
169
  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170
   order.*/
171
static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172
  /*These tokens require additional extra bits for the EOB count.*/
173
  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174
  OC_DCT_CW_FINISH,
175
  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176
  OC_DCT_CW_PACK(16, 0,  0,0),
177
  /*These tokens require additional extra bits for the magnitude.*/
178
  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179
  OC_DCT_CW_PACK( 0, 0, 13,0),
180
  OC_DCT_CW_PACK( 0, 0, 13,1),
181
  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182
  OC_DCT_CW_PACK( 0, 0, 21,0),
183
  OC_DCT_CW_PACK( 0, 0, 21,1),
184
  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185
  OC_DCT_CW_PACK( 0, 0, 37,0),
186
  OC_DCT_CW_PACK( 0, 0, 37,1),
187
  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188
  OC_DCT_CW_PACK( 0, 0, 69,0),
189
  OC_DCT_CW_PACK( 0, 0,325,0),
190
  OC_DCT_CW_PACK( 0, 0, 69,1),
191
  OC_DCT_CW_PACK( 0, 0,325,1),
192
  /*These tokens require additional extra bits for the run length.*/
193
  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194
  OC_DCT_CW_PACK( 0,10, +1,0),
195
  OC_DCT_CW_PACK( 0,10, -1,0),
196
  /*OC_DCT_ZRL_TOKEN (6 extra bits)
197
    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198
  OC_DCT_CW_PACK( 0, 0,  0,1),
199
  /*The remaining tokens require no additional extra bits.*/
200
  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201
  OC_DCT_CW_PACK( 1, 0,  0,0),
202
  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203
  OC_DCT_CW_PACK( 2, 0,  0,0),
204
  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205
  OC_DCT_CW_PACK( 3, 0,  0,0),
206
  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207
  OC_DCT_CW_PACK( 0, 1, +1,0),
208
  OC_DCT_CW_PACK( 0, 1, -1,0),
209
  OC_DCT_CW_PACK( 0, 2, +1,0),
210
  OC_DCT_CW_PACK( 0, 2, -1,0),
211
  OC_DCT_CW_PACK( 0, 3, +1,0),
212
  OC_DCT_CW_PACK( 0, 3, -1,0),
213
  OC_DCT_CW_PACK( 0, 4, +1,0),
214
  OC_DCT_CW_PACK( 0, 4, -1,0),
215
  OC_DCT_CW_PACK( 0, 5, +1,0),
216
  OC_DCT_CW_PACK( 0, 5, -1,0),
217
  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218
  OC_DCT_CW_PACK( 0, 1, +2,0),
219
  OC_DCT_CW_PACK( 0, 1, +3,0),
220
  OC_DCT_CW_PACK( 0, 1, -2,0),
221
  OC_DCT_CW_PACK( 0, 1, -3,0),
222
  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223
  OC_DCT_CW_PACK( 0, 6, +1,0),
224
  OC_DCT_CW_PACK( 0, 7, +1,0),
225
  OC_DCT_CW_PACK( 0, 8, +1,0),
226
  OC_DCT_CW_PACK( 0, 9, +1,0),
227
  OC_DCT_CW_PACK( 0, 6, -1,0),
228
  OC_DCT_CW_PACK( 0, 7, -1,0),
229
  OC_DCT_CW_PACK( 0, 8, -1,0),
230
  OC_DCT_CW_PACK( 0, 9, -1,0),
231
  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232
  OC_DCT_CW_PACK( 0, 2, +2,0),
233
  OC_DCT_CW_PACK( 0, 3, +2,0),
234
  OC_DCT_CW_PACK( 0, 2, +3,0),
235
  OC_DCT_CW_PACK( 0, 3, +3,0),
236
  OC_DCT_CW_PACK( 0, 2, -2,0),
237
  OC_DCT_CW_PACK( 0, 3, -2,0),
238
  OC_DCT_CW_PACK( 0, 2, -3,0),
239
  OC_DCT_CW_PACK( 0, 3, -3,0),
240
  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241
    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242
  OC_DCT_CW_PACK( 0, 0,  0,1),
243
  OC_DCT_CW_PACK( 0, 1,  0,0),
244
  OC_DCT_CW_PACK( 0, 2,  0,0),
245
  OC_DCT_CW_PACK( 0, 3,  0,0),
246
  OC_DCT_CW_PACK( 0, 4,  0,0),
247
  OC_DCT_CW_PACK( 0, 5,  0,0),
248
  OC_DCT_CW_PACK( 0, 6,  0,0),
249
  OC_DCT_CW_PACK( 0, 7,  0,0),
250
  /*OC_ONE_TOKEN (0 extra bits)*/
251
  OC_DCT_CW_PACK( 0, 0, +1,0),
252
  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253
  OC_DCT_CW_PACK( 0, 0, -1,0),
254
  /*OC_TWO_TOKEN (0 extra bits)*/
255
  OC_DCT_CW_PACK( 0, 0, +2,0),
256
  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257
  OC_DCT_CW_PACK( 0, 0, -2,0),
258
  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259
  OC_DCT_CW_PACK( 0, 0, +3,0),
260
  OC_DCT_CW_PACK( 0, 0, -3,0),
261
  OC_DCT_CW_PACK( 0, 0, +4,0),
262
  OC_DCT_CW_PACK( 0, 0, -4,0),
263
  OC_DCT_CW_PACK( 0, 0, +5,0),
264
  OC_DCT_CW_PACK( 0, 0, -5,0),
265
  OC_DCT_CW_PACK( 0, 0, +6,0),
266
  OC_DCT_CW_PACK( 0, 0, -6,0),
267
  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268
  OC_DCT_CW_PACK( 0, 0, +7,0),
269
  OC_DCT_CW_PACK( 0, 0, +8,0),
270
  OC_DCT_CW_PACK( 0, 0, -7,0),
271
  OC_DCT_CW_PACK( 0, 0, -8,0),
272
  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273
  OC_DCT_CW_PACK( 0, 0, +9,0),
274
  OC_DCT_CW_PACK( 0, 0,+10,0),
275
  OC_DCT_CW_PACK( 0, 0,+11,0),
276
  OC_DCT_CW_PACK( 0, 0,+12,0),
277
  OC_DCT_CW_PACK( 0, 0, -9,0),
278
  OC_DCT_CW_PACK( 0, 0,-10,0),
279
  OC_DCT_CW_PACK( 0, 0,-11,0),
280
  OC_DCT_CW_PACK( 0, 0,-12,0),
281
  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282
  OC_DCT_CW_PACK( 8, 0,  0,0),
283
  OC_DCT_CW_PACK( 9, 0,  0,0),
284
  OC_DCT_CW_PACK(10, 0,  0,0),
285
  OC_DCT_CW_PACK(11, 0,  0,0),
286
  OC_DCT_CW_PACK(12, 0,  0,0),
287
  OC_DCT_CW_PACK(13, 0,  0,0),
288
  OC_DCT_CW_PACK(14, 0,  0,0),
289
  OC_DCT_CW_PACK(15, 0,  0,0),
290
  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291
  OC_DCT_CW_PACK( 4, 0,  0,0),
292
  OC_DCT_CW_PACK( 5, 0,  0,0),
293
  OC_DCT_CW_PACK( 6, 0,  0,0),
294
  OC_DCT_CW_PACK( 7, 0,  0,0),
295
};
296
297
298
299
1.81M
static int oc_sb_run_unpack(oc_pack_buf *_opb){
300
  /*Coding scheme:
301
       Codeword            Run Length
302
     0                       1
303
     10x                     2-3
304
     110x                    4-5
305
     1110xx                  6-9
306
     11110xxx                10-17
307
     111110xxxx              18-33
308
     111111xxxxxxxxxxxx      34-4129*/
309
1.81M
  static const ogg_int16_t OC_SB_RUN_TREE[22]={
310
1.81M
    4,
311
1.81M
     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312
1.81M
     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313
1.81M
     -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314
1.81M
     -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315
1.81M
      2,
316
1.81M
       -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317
1.81M
  };
318
1.81M
  int ret;
319
1.81M
  ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320
1.81M
  if(ret>=0x10){
321
1.76k
    int offs;
322
1.76k
    offs=ret&0x1F;
323
1.76k
    ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324
1.76k
  }
325
1.81M
  return ret;
326
1.81M
}
327
328
2.99M
static int oc_block_run_unpack(oc_pack_buf *_opb){
329
  /*Coding scheme:
330
     Codeword             Run Length
331
     0x                      1-2
332
     10x                     3-4
333
     110x                    5-6
334
     1110xx                  7-10
335
     11110xx                 11-14
336
     11111xxxx               15-30*/
337
2.99M
  static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338
2.99M
    5,
339
2.99M
     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340
2.99M
     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341
2.99M
     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342
2.99M
     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343
2.99M
     -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344
2.99M
     -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345
2.99M
     -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346
2.99M
     33,       36,       39,       44,
347
2.99M
      1,-(1<<8|7),-(1<<8|8),
348
2.99M
      1,-(1<<8|9),-(1<<8|10),
349
2.99M
      2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350
2.99M
      4,
351
2.99M
       -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352
2.99M
       -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353
2.99M
       -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354
2.99M
       -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355
2.99M
  };
356
2.99M
  return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357
2.99M
}
358
359
360
361
1.08k
void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362
# if defined(OC_DEC_USE_VTABLE)
363
  _dec->opt_vtable.dc_unpredict_mcu_plane=
364
   oc_dec_dc_unpredict_mcu_plane_c;
365
# endif
366
1.08k
}
367
368
static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369
1.08k
 const th_setup_info *_setup){
370
1.08k
  int qti;
371
1.08k
  int pli;
372
1.08k
  int qi;
373
1.08k
  int ret;
374
1.08k
  ret=oc_state_init(&_dec->state,_info,3);
375
1.08k
  if(ret<0)return ret;
376
1.08k
  ret=oc_huff_trees_copy(_dec->huff_tables,
377
1.08k
   (const ogg_int16_t *const *)_setup->huff_tables);
378
1.08k
  if(ret<0){
379
0
    oc_state_clear(&_dec->state);
380
0
    return ret;
381
0
  }
382
  /*For each fragment, allocate one byte for every DCT coefficient token, plus
383
     one byte for extra-bits for each token, plus one more byte for the long
384
     EOB run, just in case it's the very last token and has a run length of
385
     one.*/
386
1.08k
  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387
1.08k
   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388
1.08k
  if(_dec->dct_tokens==NULL){
389
0
    oc_huff_trees_clear(_dec->huff_tables);
390
0
    oc_state_clear(&_dec->state);
391
0
    return TH_EFAULT;
392
0
  }
393
623k
  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394
415k
    _dec->state.dequant_tables[qi][pli][qti]=
395
415k
     _dec->state.dequant_table_data[qi][pli][qti];
396
415k
  }
397
1.08k
  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398
1.08k
   &_setup->qinfo);
399
70.3k
  for(qi=0;qi<64;qi++){
400
69.2k
    int qsum;
401
69.2k
    qsum=0;
402
553k
    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403
415k
      qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404
415k
       _dec->state.dequant_tables[qi][pli][qti][17]+
405
415k
       _dec->state.dequant_tables[qi][pli][qti][18]+
406
415k
       _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407
415k
    }
408
69.2k
    _dec->pp_sharp_mod[qi]=-(qsum>>11);
409
69.2k
  }
410
1.08k
  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411
1.08k
   sizeof(_dec->state.loop_filter_limits));
412
1.08k
  oc_dec_accel_init(_dec);
413
1.08k
  _dec->pp_level=OC_PP_LEVEL_DISABLED;
414
1.08k
  _dec->dc_qis=NULL;
415
1.08k
  _dec->variances=NULL;
416
1.08k
  _dec->pp_frame_data=NULL;
417
1.08k
  _dec->stripe_cb.ctx=NULL;
418
1.08k
  _dec->stripe_cb.stripe_decoded=NULL;
419
#if defined(HAVE_CAIRO)
420
  _dec->telemetry_bits=0;
421
  _dec->telemetry_qi=0;
422
  _dec->telemetry_mbmode=0;
423
  _dec->telemetry_mv=0;
424
  _dec->telemetry_frame_data=NULL;
425
#endif
426
1.08k
  return 0;
427
1.08k
}
428
429
1.08k
static void oc_dec_clear(oc_dec_ctx *_dec){
430
#if defined(HAVE_CAIRO)
431
  _ogg_free(_dec->telemetry_frame_data);
432
#endif
433
1.08k
  _ogg_free(_dec->pp_frame_data);
434
1.08k
  _ogg_free(_dec->variances);
435
1.08k
  _ogg_free(_dec->dc_qis);
436
1.08k
  _ogg_free(_dec->dct_tokens);
437
1.08k
  oc_huff_trees_clear(_dec->huff_tables);
438
1.08k
  oc_state_clear(&_dec->state);
439
1.08k
}
440
441
442
1.13k
static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
443
1.13k
  long val;
444
  /*Check to make sure this is a data packet.*/
445
1.13k
  val=oc_pack_read1(&_dec->opb);
446
1.13k
  if(val!=0)return TH_EBADPACKET;
447
  /*Read in the frame type (I or P).*/
448
1.13k
  val=oc_pack_read1(&_dec->opb);
449
1.13k
  _dec->state.frame_type=(int)val;
450
  /*Read in the qi list.*/
451
1.13k
  val=oc_pack_read(&_dec->opb,6);
452
1.13k
  _dec->state.qis[0]=(unsigned char)val;
453
1.13k
  val=oc_pack_read1(&_dec->opb);
454
1.13k
  if(!val)_dec->state.nqis=1;
455
223
  else{
456
223
    val=oc_pack_read(&_dec->opb,6);
457
223
    _dec->state.qis[1]=(unsigned char)val;
458
223
    val=oc_pack_read1(&_dec->opb);
459
223
    if(!val)_dec->state.nqis=2;
460
181
    else{
461
181
      val=oc_pack_read(&_dec->opb,6);
462
181
      _dec->state.qis[2]=(unsigned char)val;
463
181
      _dec->state.nqis=3;
464
181
    }
465
223
  }
466
1.13k
  if(_dec->state.frame_type==OC_INTRA_FRAME){
467
    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
468
      Most of the other unused bits in the VP3 headers were eliminated.
469
      I don't know why these remain.*/
470
    /*I wanted to eliminate wasted bits, but not all config wiggle room
471
       --Monty.*/
472
424
    val=oc_pack_read(&_dec->opb,3);
473
424
    if(val!=0)return TH_EIMPL;
474
424
  }
475
1.12k
  return 0;
476
1.13k
}
477
478
/*Mark all fragments as coded and in OC_MODE_INTRA.
479
  This also builds up the coded fragment list (in coded order), and clears the
480
   uncoded fragment list.
481
  It does not update the coded macro block list nor the super block flags, as
482
   those are not used when decoding INTRA frames.*/
483
418
static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
484
418
  const oc_sb_map   *sb_maps;
485
418
  const oc_sb_flags *sb_flags;
486
418
  oc_fragment       *frags;
487
418
  ptrdiff_t         *coded_fragis;
488
418
  ptrdiff_t          ncoded_fragis;
489
418
  ptrdiff_t          prev_ncoded_fragis;
490
418
  unsigned           nsbs;
491
418
  unsigned           sbi;
492
418
  int                pli;
493
418
  coded_fragis=_dec->state.coded_fragis;
494
418
  prev_ncoded_fragis=ncoded_fragis=0;
495
418
  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
496
418
  sb_flags=_dec->state.sb_flags;
497
418
  frags=_dec->state.frags;
498
418
  sbi=nsbs=0;
499
1.67k
  for(pli=0;pli<3;pli++){
500
1.25k
    nsbs+=_dec->state.fplanes[pli].nsbs;
501
390k
    for(;sbi<nsbs;sbi++){
502
389k
      int quadi;
503
1.94M
      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
504
1.52M
        int bi;
505
7.62M
        for(bi=0;bi<4;bi++){
506
6.10M
          ptrdiff_t fragi;
507
6.10M
          fragi=sb_maps[sbi][quadi][bi];
508
6.10M
          if(fragi>=0){
509
6.07M
            frags[fragi].coded=1;
510
6.07M
            frags[fragi].refi=OC_FRAME_SELF;
511
6.07M
            frags[fragi].mb_mode=OC_MODE_INTRA;
512
6.07M
            coded_fragis[ncoded_fragis++]=fragi;
513
6.07M
          }
514
6.10M
        }
515
1.52M
      }
516
389k
    }
517
1.25k
    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
518
1.25k
    prev_ncoded_fragis=ncoded_fragis;
519
1.25k
  }
520
418
  _dec->state.ntotal_coded_fragis=ncoded_fragis;
521
418
}
522
523
/*Decodes the bit flags indicating whether each super block is partially coded
524
   or not.
525
  Return: The number of partially coded super blocks.*/
526
711
static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
527
711
  oc_sb_flags *sb_flags;
528
711
  unsigned     nsbs;
529
711
  unsigned     sbi;
530
711
  unsigned     npartial;
531
711
  unsigned     run_count;
532
711
  long         val;
533
711
  int          flag;
534
711
  val=oc_pack_read1(&_dec->opb);
535
711
  flag=(int)val;
536
711
  sb_flags=_dec->state.sb_flags;
537
711
  nsbs=_dec->state.nsbs;
538
711
  sbi=npartial=0;
539
319k
  while(sbi<nsbs){
540
318k
    int full_run;
541
318k
    run_count=oc_sb_run_unpack(&_dec->opb);
542
318k
    full_run=run_count>=4129;
543
414k
    do{
544
414k
      sb_flags[sbi].coded_partially=flag;
545
414k
      sb_flags[sbi].coded_fully=0;
546
414k
      npartial+=flag;
547
414k
      sbi++;
548
414k
    }
549
414k
    while(--run_count>0&&sbi<nsbs);
550
318k
    if(full_run&&sbi<nsbs){
551
0
      val=oc_pack_read1(&_dec->opb);
552
0
      flag=(int)val;
553
0
    }
554
318k
    else flag=!flag;
555
318k
  }
556
  /*TODO: run_count should be 0 here.
557
    If it's not, we should issue a warning of some kind.*/
558
711
  return npartial;
559
711
}
560
561
/*Decodes the bit flags for whether or not each non-partially-coded super
562
   block is fully coded or not.
563
  This function should only be called if there is at least one
564
   non-partially-coded super block.
565
  Return: The number of partially coded super blocks.*/
566
675
static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
567
675
  oc_sb_flags *sb_flags;
568
675
  unsigned     nsbs;
569
675
  unsigned     sbi;
570
675
  unsigned     run_count;
571
675
  long         val;
572
675
  int          flag;
573
675
  sb_flags=_dec->state.sb_flags;
574
675
  nsbs=_dec->state.nsbs;
575
  /*Skip partially coded super blocks.*/
576
5.67k
  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
577
675
  val=oc_pack_read1(&_dec->opb);
578
675
  flag=(int)val;
579
172k
  do{
580
172k
    int full_run;
581
172k
    run_count=oc_sb_run_unpack(&_dec->opb);
582
172k
    full_run=run_count>=4129;
583
565k
    for(;sbi<nsbs;sbi++){
584
565k
      if(sb_flags[sbi].coded_partially)continue;
585
384k
      if(run_count--<=0)break;
586
212k
      sb_flags[sbi].coded_fully=flag;
587
212k
    }
588
172k
    if(full_run&&sbi<nsbs){
589
0
      val=oc_pack_read1(&_dec->opb);
590
0
      flag=(int)val;
591
0
    }
592
172k
    else flag=!flag;
593
172k
  }
594
172k
  while(sbi<nsbs);
595
  /*TODO: run_count should be 0 here.
596
    If it's not, we should issue a warning of some kind.*/
597
675
}
598
599
711
static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
600
711
  const oc_sb_map   *sb_maps;
601
711
  const oc_sb_flags *sb_flags;
602
711
  signed char       *mb_modes;
603
711
  oc_fragment       *frags;
604
711
  unsigned           nsbs;
605
711
  unsigned           sbi;
606
711
  unsigned           npartial;
607
711
  long               val;
608
711
  int                pli;
609
711
  int                flag;
610
711
  int                run_count;
611
711
  ptrdiff_t         *coded_fragis;
612
711
  ptrdiff_t         *uncoded_fragis;
613
711
  ptrdiff_t          ncoded_fragis;
614
711
  ptrdiff_t          nuncoded_fragis;
615
711
  ptrdiff_t          prev_ncoded_fragis;
616
711
  npartial=oc_dec_partial_sb_flags_unpack(_dec);
617
711
  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
618
711
  if(npartial>0){
619
596
    val=oc_pack_read1(&_dec->opb);
620
596
    flag=!(int)val;
621
596
  }
622
115
  else flag=0;
623
711
  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
624
711
  sb_flags=_dec->state.sb_flags;
625
711
  mb_modes=_dec->state.mb_modes;
626
711
  frags=_dec->state.frags;
627
711
  sbi=nsbs=run_count=0;
628
711
  coded_fragis=_dec->state.coded_fragis;
629
711
  uncoded_fragis=coded_fragis+_dec->state.nfrags;
630
711
  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
631
2.84k
  for(pli=0;pli<3;pli++){
632
2.13k
    nsbs+=_dec->state.fplanes[pli].nsbs;
633
416k
    for(;sbi<nsbs;sbi++){
634
414k
      int quadi;
635
2.07M
      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
636
1.59M
        int quad_coded;
637
1.59M
        int bi;
638
1.59M
        quad_coded=0;
639
7.96M
        for(bi=0;bi<4;bi++){
640
6.36M
          ptrdiff_t fragi;
641
6.36M
          fragi=sb_maps[sbi][quadi][bi];
642
6.36M
          if(fragi>=0){
643
6.33M
            int coded;
644
6.33M
            if(sb_flags[sbi].coded_fully)coded=1;
645
4.56M
            else if(!sb_flags[sbi].coded_partially)coded=0;
646
3.08M
            else{
647
3.08M
              if(run_count<=0){
648
2.99M
                run_count=oc_block_run_unpack(&_dec->opb);
649
2.99M
                flag=!flag;
650
2.99M
              }
651
3.08M
              run_count--;
652
3.08M
              coded=flag;
653
3.08M
            }
654
6.33M
            if(coded)coded_fragis[ncoded_fragis++]=fragi;
655
3.02M
            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
656
6.33M
            quad_coded|=coded;
657
6.33M
            frags[fragi].coded=coded;
658
6.33M
            frags[fragi].refi=OC_FRAME_NONE;
659
6.33M
          }
660
6.36M
        }
661
        /*Remember if there's a coded luma block in this macro block.*/
662
1.59M
        if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
663
1.59M
      }
664
414k
    }
665
2.13k
    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
666
2.13k
    prev_ncoded_fragis=ncoded_fragis;
667
2.13k
  }
668
711
  _dec->state.ntotal_coded_fragis=ncoded_fragis;
669
  /*TODO: run_count should be 0 here.
670
    If it's not, we should issue a warning of some kind.*/
671
711
}
672
673
674
/*Coding scheme:
675
   Codeword            Mode Index
676
   0                       0
677
   10                      1
678
   110                     2
679
   1110                    3
680
   11110                   4
681
   111110                  5
682
   1111110                 6
683
   1111111                 7*/
684
static const ogg_int16_t OC_VLC_MODE_TREE[26]={
685
  4,
686
   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
687
   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688
   -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
689
   -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
690
    3,
691
     -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
692
     -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
693
};
694
695
static const ogg_int16_t OC_CLC_MODE_TREE[9]={
696
  3,
697
   -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
698
   -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
699
};
700
701
/*Unpacks the list of macro block modes for INTER frames.*/
702
710
static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
703
710
  signed char         *mb_modes;
704
710
  const unsigned char *alphabet;
705
710
  unsigned char        scheme0_alphabet[8];
706
710
  const ogg_int16_t   *mode_tree;
707
710
  size_t               nmbs;
708
710
  size_t               mbi;
709
710
  long                 val;
710
710
  int                  mode_scheme;
711
710
  val=oc_pack_read(&_dec->opb,3);
712
710
  mode_scheme=(int)val;
713
710
  if(mode_scheme==0){
714
632
    int mi;
715
    /*Just in case, initialize the modes to something.
716
      If the bitstream doesn't contain each index exactly once, it's likely
717
       corrupt and the rest of the packet is garbage anyway, but this way we
718
       won't crash, and we'll decode SOMETHING.*/
719
    /*LOOP VECTORIZES*/
720
5.68k
    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
721
5.68k
    for(mi=0;mi<OC_NMODES;mi++){
722
5.05k
      val=oc_pack_read(&_dec->opb,3);
723
5.05k
      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
724
5.05k
    }
725
632
    alphabet=scheme0_alphabet;
726
632
  }
727
78
  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
728
710
  mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
729
710
  mb_modes=_dec->state.mb_modes;
730
710
  nmbs=_dec->state.nmbs;
731
739k
  for(mbi=0;mbi<nmbs;mbi++){
732
738k
    if(mb_modes[mbi]>0){
733
      /*We have a coded luma block; decode a mode.*/
734
544k
      mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
735
544k
    }
736
    /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
737
       fact that OC_MODE_INTER_NOMV is already 0.*/
738
738k
  }
739
710
}
740
741
742
743
static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
744
  5,
745
   -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
746
   -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
747
   -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
748
   -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
749
   -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
750
   33,          36,          39,          42,
751
   45,          50,          55,          60,
752
   65,          74,          83,          92,
753
    1,-(1<<8|32+4),-(1<<8|32-4),
754
    1,-(1<<8|32+5),-(1<<8|32-5),
755
    1,-(1<<8|32+6),-(1<<8|32-6),
756
    1,-(1<<8|32+7),-(1<<8|32-7),
757
    2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
758
    2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
759
    2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
760
    2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
761
    3,
762
     -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
763
     -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
764
    3,
765
     -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
766
     -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
767
    3,
768
     -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
769
     -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
770
    3,
771
     -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
772
     -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
773
};
774
775
static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
776
  6,
777
   -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
778
   -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
779
   -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
780
   -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
781
   -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
782
   -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
783
   -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
784
   -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
785
   -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
786
   -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
787
   -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
788
   -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
789
   -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
790
   -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
791
   -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
792
   -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
793
};
794
795
796
1.35M
static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
797
1.35M
  int dx;
798
1.35M
  int dy;
799
1.35M
  dx=oc_huff_token_decode(_opb,_tree)-32;
800
1.35M
  dy=oc_huff_token_decode(_opb,_tree)-32;
801
1.35M
  return OC_MV(dx,dy);
802
1.35M
}
803
804
/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
805
   block modes and motion vectors to the individual fragments.*/
806
710
static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
807
710
  const oc_mb_map        *mb_maps;
808
710
  const signed char      *mb_modes;
809
710
  oc_set_chroma_mvs_func  set_chroma_mvs;
810
710
  const ogg_int16_t      *mv_comp_tree;
811
710
  oc_fragment            *frags;
812
710
  oc_mv                  *frag_mvs;
813
710
  const unsigned char    *map_idxs;
814
710
  int                     map_nidxs;
815
710
  oc_mv                   last_mv;
816
710
  oc_mv                   prior_mv;
817
710
  oc_mv                   cbmvs[4];
818
710
  size_t                  nmbs;
819
710
  size_t                  mbi;
820
710
  long                    val;
821
710
  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
822
710
  val=oc_pack_read1(&_dec->opb);
823
710
  mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
824
710
  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
825
710
  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
826
710
  prior_mv=last_mv=0;
827
710
  frags=_dec->state.frags;
828
710
  frag_mvs=_dec->state.frag_mvs;
829
710
  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
830
710
  mb_modes=_dec->state.mb_modes;
831
710
  nmbs=_dec->state.nmbs;
832
739k
  for(mbi=0;mbi<nmbs;mbi++){
833
738k
    int mb_mode;
834
738k
    mb_mode=mb_modes[mbi];
835
738k
    if(mb_mode!=OC_MODE_INVALID){
836
714k
      oc_mv     mbmv;
837
714k
      ptrdiff_t fragi;
838
714k
      int       mapi;
839
714k
      int       mapii;
840
714k
      int       refi;
841
714k
      if(mb_mode==OC_MODE_INTER_MV_FOUR){
842
501k
        oc_mv lbmvs[4];
843
501k
        int   bi;
844
501k
        prior_mv=last_mv;
845
2.50M
        for(bi=0;bi<4;bi++){
846
2.00M
          fragi=mb_maps[mbi][0][bi];
847
2.00M
          if(frags[fragi].coded){
848
1.34M
            frags[fragi].refi=OC_FRAME_PREV;
849
1.34M
            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
850
1.34M
            lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
851
1.34M
            frag_mvs[fragi]=lbmvs[bi];
852
1.34M
          }
853
662k
          else lbmvs[bi]=0;
854
2.00M
        }
855
501k
        (*set_chroma_mvs)(cbmvs,lbmvs);
856
3.05M
        for(mapii=4;mapii<map_nidxs;mapii++){
857
2.54M
          mapi=map_idxs[mapii];
858
2.54M
          bi=mapi&3;
859
2.54M
          fragi=mb_maps[mbi][mapi>>2][bi];
860
2.54M
          if(frags[fragi].coded){
861
1.34M
            frags[fragi].refi=OC_FRAME_PREV;
862
1.34M
            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
863
1.34M
            frag_mvs[fragi]=cbmvs[bi];
864
1.34M
          }
865
2.54M
        }
866
501k
      }
867
213k
      else{
868
213k
        switch(mb_mode){
869
7.13k
          case OC_MODE_INTER_MV:{
870
7.13k
            prior_mv=last_mv;
871
7.13k
            last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
872
7.13k
          }break;
873
13.9k
          case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
874
9.01k
          case OC_MODE_INTER_MV_LAST2:{
875
9.01k
            mbmv=prior_mv;
876
9.01k
            prior_mv=last_mv;
877
9.01k
            last_mv=mbmv;
878
9.01k
          }break;
879
5.07k
          case OC_MODE_GOLDEN_MV:{
880
5.07k
            mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
881
5.07k
          }break;
882
178k
          default:mbmv=0;break;
883
213k
        }
884
        /*Fill in the MVs for the fragments.*/
885
213k
        refi=OC_FRAME_FOR_MODE(mb_mode);
886
213k
        mapii=0;
887
1.78M
        do{
888
1.78M
          mapi=map_idxs[mapii];
889
1.78M
          fragi=mb_maps[mbi][mapi>>2][mapi&3];
890
1.78M
          if(frags[fragi].coded){
891
630k
            frags[fragi].refi=refi;
892
630k
            frags[fragi].mb_mode=mb_mode;
893
630k
            frag_mvs[fragi]=mbmv;
894
630k
          }
895
1.78M
        }
896
1.78M
        while(++mapii<map_nidxs);
897
213k
      }
898
714k
    }
899
738k
  }
900
710
}
901
902
1.12k
static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
903
1.12k
  oc_fragment     *frags;
904
1.12k
  const ptrdiff_t *coded_fragis;
905
1.12k
  ptrdiff_t        ncoded_fragis;
906
1.12k
  ptrdiff_t        fragii;
907
1.12k
  ptrdiff_t        fragi;
908
1.12k
  ncoded_fragis=_dec->state.ntotal_coded_fragis;
909
1.12k
  if(ncoded_fragis<=0)return;
910
1.12k
  frags=_dec->state.frags;
911
1.12k
  coded_fragis=_dec->state.coded_fragis;
912
1.12k
  if(_dec->state.nqis==1){
913
    /*If this frame has only a single qi value, then just use it for all coded
914
       fragments.*/
915
7.75M
    for(fragii=0;fragii<ncoded_fragis;fragii++){
916
7.75M
      frags[coded_fragis[fragii]].qii=0;
917
7.75M
    }
918
907
  }
919
221
  else{
920
221
    long val;
921
221
    int  flag;
922
221
    int  nqi1;
923
221
    int  run_count;
924
    /*Otherwise, we decode a qi index for each fragment, using two passes of
925
      the same binary RLE scheme used for super-block coded bits.
926
     The first pass marks each fragment as having a qii of 0 or greater than
927
      0, and the second pass (if necessary), distinguishes between a qii of
928
      1 and 2.
929
     At first we just store the qii in the fragment.
930
     After all the qii's are decoded, we make a final pass to replace them
931
      with the corresponding qi's for this frame.*/
932
221
    val=oc_pack_read1(&_dec->opb);
933
221
    flag=(int)val;
934
221
    nqi1=0;
935
221
    fragii=0;
936
843k
    while(fragii<ncoded_fragis){
937
842k
      int full_run;
938
842k
      run_count=oc_sb_run_unpack(&_dec->opb);
939
842k
      full_run=run_count>=4129;
940
1.64M
      do{
941
1.64M
        frags[coded_fragis[fragii++]].qii=flag;
942
1.64M
        nqi1+=flag;
943
1.64M
      }
944
1.64M
      while(--run_count>0&&fragii<ncoded_fragis);
945
842k
      if(full_run&&fragii<ncoded_fragis){
946
147
        val=oc_pack_read1(&_dec->opb);
947
147
        flag=(int)val;
948
147
      }
949
842k
      else flag=!flag;
950
842k
    }
951
    /*TODO: run_count should be 0 here.
952
      If it's not, we should issue a warning of some kind.*/
953
    /*If we have 3 different qi's for this frame, and there was at least one
954
       fragment with a non-zero qi, make the second pass.*/
955
221
    if(_dec->state.nqis==3&&nqi1>0){
956
      /*Skip qii==0 fragments.*/
957
25.5k
      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
958
154
      val=oc_pack_read1(&_dec->opb);
959
154
      flag=(int)val;
960
484k
      do{
961
484k
        int full_run;
962
484k
        run_count=oc_sb_run_unpack(&_dec->opb);
963
484k
        full_run=run_count>=4129;
964
1.56M
        for(;fragii<ncoded_fragis;fragii++){
965
1.56M
          fragi=coded_fragis[fragii];
966
1.56M
          if(frags[fragi].qii==0)continue;
967
1.26M
          if(run_count--<=0)break;
968
780k
          frags[fragi].qii+=flag;
969
780k
        }
970
484k
        if(full_run&&fragii<ncoded_fragis){
971
67
          val=oc_pack_read1(&_dec->opb);
972
67
          flag=(int)val;
973
67
        }
974
484k
        else flag=!flag;
975
484k
      }
976
484k
      while(fragii<ncoded_fragis);
977
      /*TODO: run_count should be 0 here.
978
        If it's not, we should issue a warning of some kind.*/
979
154
    }
980
221
  }
981
1.12k
}
982
983
984
985
/*Unpacks the DC coefficient tokens.
986
  Unlike when unpacking the AC coefficient tokens, we actually need to decode
987
   the DC coefficient values now so that we can do DC prediction.
988
  _huff_idx:   The index of the Huffman table to use for each color plane.
989
  _ntoks_left: The number of tokens left to be decoded in each color plane for
990
                each coefficient.
991
               This is updated as EOB tokens and zero run tokens are decoded.
992
  Return: The length of any outstanding EOB run.*/
993
static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
994
1.12k
 ptrdiff_t _ntoks_left[3][64]){
995
1.12k
  unsigned char   *dct_tokens;
996
1.12k
  oc_fragment     *frags;
997
1.12k
  const ptrdiff_t *coded_fragis;
998
1.12k
  ptrdiff_t        ncoded_fragis;
999
1.12k
  ptrdiff_t        fragii;
1000
1.12k
  ptrdiff_t        eobs;
1001
1.12k
  ptrdiff_t        ti;
1002
1.12k
  int              pli;
1003
1.12k
  dct_tokens=_dec->dct_tokens;
1004
1.12k
  frags=_dec->state.frags;
1005
1.12k
  coded_fragis=_dec->state.coded_fragis;
1006
1.12k
  ncoded_fragis=fragii=eobs=ti=0;
1007
4.51k
  for(pli=0;pli<3;pli++){
1008
3.38k
    ptrdiff_t run_counts[64];
1009
3.38k
    ptrdiff_t eob_count;
1010
3.38k
    ptrdiff_t eobi;
1011
3.38k
    int       rli;
1012
3.38k
    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1013
3.38k
    memset(run_counts,0,sizeof(run_counts));
1014
3.38k
    _dec->eob_runs[pli][0]=eobs;
1015
3.38k
    _dec->ti0[pli][0]=ti;
1016
    /*Continue any previous EOB run, if there was one.*/
1017
3.38k
    eobi=eobs;
1018
3.38k
    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1019
3.38k
    eob_count=eobi;
1020
3.38k
    eobs-=eobi;
1021
910k
    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1022
6.18M
    while(fragii<ncoded_fragis){
1023
6.18M
      int token;
1024
6.18M
      int cw;
1025
6.18M
      int eb;
1026
6.18M
      int skip;
1027
6.18M
      token=oc_huff_token_decode(&_dec->opb,
1028
6.18M
       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1029
6.18M
      dct_tokens[ti++]=(unsigned char)token;
1030
6.18M
      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1031
2.51M
        eb=(int)oc_pack_read(&_dec->opb,
1032
2.51M
         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1033
2.51M
        dct_tokens[ti++]=(unsigned char)eb;
1034
2.51M
        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1035
2.51M
        eb<<=OC_DCT_TOKEN_EB_POS(token);
1036
2.51M
      }
1037
3.67M
      else eb=0;
1038
6.18M
      cw=OC_DCT_CODE_WORD[token]+eb;
1039
6.18M
      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1040
6.18M
      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1041
6.18M
      if(eobs){
1042
764k
        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1043
764k
        eob_count+=eobi;
1044
764k
        eobs-=eobi;
1045
3.83M
        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1046
764k
      }
1047
5.41M
      else{
1048
5.41M
        int coeff;
1049
5.41M
        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1050
5.41M
        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1051
5.41M
        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1052
5.41M
        if(skip)coeff=0;
1053
5.41M
        run_counts[skip]++;
1054
5.41M
        frags[coded_fragis[fragii++]].dc=coeff;
1055
5.41M
      }
1056
6.18M
    }
1057
    /*Add the total EOB count to the longest run length.*/
1058
3.38k
    run_counts[63]+=eob_count;
1059
    /*And convert the run_counts array to a moment table.*/
1060
216k
    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1061
    /*Finally, subtract off the number of coefficients that have been
1062
       accounted for by runs started in this coefficient.*/
1063
219k
    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1064
3.38k
  }
1065
1.12k
  _dec->dct_tokens_count=ti;
1066
1.12k
  return eobs;
1067
1.12k
}
1068
1069
/*Unpacks the AC coefficient tokens.
1070
  This can completely discard coefficient values while unpacking, and so is
1071
   somewhat simpler than unpacking the DC coefficient tokens.
1072
  _huff_idx:   The index of the Huffman table to use for each color plane.
1073
  _ntoks_left: The number of tokens left to be decoded in each color plane for
1074
                each coefficient.
1075
               This is updated as EOB tokens and zero run tokens are decoded.
1076
  _eobs:       The length of any outstanding EOB run from previous
1077
                coefficients.
1078
  Return: The length of any outstanding EOB run.*/
1079
static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1080
71.0k
 ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1081
71.0k
  unsigned char *dct_tokens;
1082
71.0k
  ptrdiff_t      ti;
1083
71.0k
  int            pli;
1084
71.0k
  dct_tokens=_dec->dct_tokens;
1085
71.0k
  ti=_dec->dct_tokens_count;
1086
284k
  for(pli=0;pli<3;pli++){
1087
213k
    ptrdiff_t run_counts[64];
1088
213k
    ptrdiff_t eob_count;
1089
213k
    size_t    ntoks_left;
1090
213k
    size_t    ntoks;
1091
213k
    int       rli;
1092
213k
    _dec->eob_runs[pli][_zzi]=_eobs;
1093
213k
    _dec->ti0[pli][_zzi]=ti;
1094
213k
    ntoks_left=_ntoks_left[pli][_zzi];
1095
213k
    memset(run_counts,0,sizeof(run_counts));
1096
213k
    eob_count=0;
1097
213k
    ntoks=0;
1098
99.5M
    while(ntoks+_eobs<ntoks_left){
1099
99.3M
      int token;
1100
99.3M
      int cw;
1101
99.3M
      int eb;
1102
99.3M
      int skip;
1103
99.3M
      ntoks+=_eobs;
1104
99.3M
      eob_count+=_eobs;
1105
99.3M
      token=oc_huff_token_decode(&_dec->opb,
1106
99.3M
       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1107
99.3M
      dct_tokens[ti++]=(unsigned char)token;
1108
99.3M
      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1109
49.7M
        eb=(int)oc_pack_read(&_dec->opb,
1110
49.7M
         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1111
49.7M
        dct_tokens[ti++]=(unsigned char)eb;
1112
49.7M
        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1113
49.7M
        eb<<=OC_DCT_TOKEN_EB_POS(token);
1114
49.7M
      }
1115
49.5M
      else eb=0;
1116
99.3M
      cw=OC_DCT_CODE_WORD[token]+eb;
1117
99.3M
      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1118
99.3M
      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1119
99.3M
      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1120
99.3M
      if(_eobs==0){
1121
98.8M
        run_counts[skip]++;
1122
98.8M
        ntoks++;
1123
98.8M
      }
1124
99.3M
    }
1125
    /*Add the portion of the last EOB run actually used by this coefficient.*/
1126
213k
    eob_count+=ntoks_left-ntoks;
1127
    /*And remove it from the remaining EOB count.*/
1128
213k
    _eobs-=ntoks_left-ntoks;
1129
    /*Add the total EOB count to the longest run length.*/
1130
213k
    run_counts[63]+=eob_count;
1131
    /*And convert the run_counts array to a moment table.*/
1132
13.6M
    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1133
    /*Finally, subtract off the number of coefficients that have been
1134
       accounted for by runs started in this coefficient.*/
1135
7.03M
    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1136
213k
  }
1137
71.0k
  _dec->dct_tokens_count=ti;
1138
71.0k
  return _eobs;
1139
71.0k
}
1140
1141
/*Tokens describing the DCT coefficients that belong to each fragment are
1142
   stored in the bitstream grouped by coefficient, not by fragment.
1143
1144
  This means that we either decode all the tokens in order, building up a
1145
   separate coefficient list for each fragment as we go, and then go back and
1146
   do the iDCT on each fragment, or we have to create separate lists of tokens
1147
   for each coefficient, so that we can pull the next token required off the
1148
   head of the appropriate list when decoding a specific fragment.
1149
1150
  The former was VP3's choice, and it meant 2*w*h extra storage for all the
1151
   decoded coefficient values.
1152
1153
  We take the second option, which lets us store just one to three bytes per
1154
   token (generally far fewer than the number of coefficients, due to EOB
1155
   tokens and zero runs), and which requires us to only maintain a counter for
1156
   each of the 64 coefficients, instead of a counter for every fragment to
1157
   determine where the next token goes.
1158
1159
  We actually use 3 counters per coefficient, one for each color plane, so we
1160
   can decode all color planes simultaneously.
1161
  This lets color conversion, etc., be done as soon as a full MCU (one or
1162
   two super block rows) is decoded, while the image data is still in cache.*/
1163
1164
1.12k
static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1165
1.12k
  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1166
1.12k
  ptrdiff_t  ntoks_left[3][64];
1167
1.12k
  int        huff_idxs[2];
1168
1.12k
  ptrdiff_t  eobs;
1169
1.12k
  long       val;
1170
1.12k
  int        pli;
1171
1.12k
  int        zzi;
1172
1.12k
  int        hgi;
1173
219k
  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1174
216k
    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1175
216k
  }
1176
1.12k
  val=oc_pack_read(&_dec->opb,4);
1177
1.12k
  huff_idxs[0]=(int)val;
1178
1.12k
  val=oc_pack_read(&_dec->opb,4);
1179
1.12k
  huff_idxs[1]=(int)val;
1180
1.12k
  _dec->eob_runs[0][0]=0;
1181
1.12k
  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1182
#if defined(HAVE_CAIRO)
1183
  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1184
#endif
1185
1.12k
  val=oc_pack_read(&_dec->opb,4);
1186
1.12k
  huff_idxs[0]=(int)val;
1187
1.12k
  val=oc_pack_read(&_dec->opb,4);
1188
1.12k
  huff_idxs[1]=(int)val;
1189
1.12k
  zzi=1;
1190
5.64k
  for(hgi=1;hgi<5;hgi++){
1191
4.51k
    huff_idxs[0]+=16;
1192
4.51k
    huff_idxs[1]+=16;
1193
75.5k
    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1194
71.0k
      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1195
71.0k
    }
1196
4.51k
  }
1197
  /*TODO: eobs should be exactly zero, or 4096 or greater.
1198
    The second case occurs when an EOB run of size zero is encountered, which
1199
     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1200
    If neither of these conditions holds, then a warning should be issued.*/
1201
1.12k
}
1202
1203
1204
1.12k
static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1205
  /*musl libc malloc()/realloc() calls might use floating point, so make sure
1206
     we've cleared the MMX state for them.*/
1207
1.12k
  oc_restore_fpu(&_dec->state);
1208
  /*pp_level 0: disabled; free any memory used and return*/
1209
1.12k
  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1210
1.12k
    if(_dec->dc_qis!=NULL){
1211
0
      _ogg_free(_dec->dc_qis);
1212
0
      _dec->dc_qis=NULL;
1213
0
      _ogg_free(_dec->variances);
1214
0
      _dec->variances=NULL;
1215
0
      _ogg_free(_dec->pp_frame_data);
1216
0
      _dec->pp_frame_data=NULL;
1217
0
    }
1218
1.12k
    return 1;
1219
1.12k
  }
1220
0
  if(_dec->dc_qis==NULL){
1221
    /*If we haven't been tracking DC quantization indices, there's no point in
1222
       starting now.*/
1223
0
    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1224
0
    _dec->dc_qis=(unsigned char *)_ogg_malloc(
1225
0
     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1226
0
    if(_dec->dc_qis==NULL)return 1;
1227
0
    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1228
0
  }
1229
0
  else{
1230
0
    unsigned char   *dc_qis;
1231
0
    const ptrdiff_t *coded_fragis;
1232
0
    ptrdiff_t        ncoded_fragis;
1233
0
    ptrdiff_t        fragii;
1234
0
    unsigned char    qi0;
1235
    /*Update the DC quantization index of each coded block.*/
1236
0
    dc_qis=_dec->dc_qis;
1237
0
    coded_fragis=_dec->state.coded_fragis;
1238
0
    ncoded_fragis=_dec->state.ncoded_fragis[0]+
1239
0
     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1240
0
    qi0=(unsigned char)_dec->state.qis[0];
1241
0
    for(fragii=0;fragii<ncoded_fragis;fragii++){
1242
0
      dc_qis[coded_fragis[fragii]]=qi0;
1243
0
    }
1244
0
  }
1245
  /*pp_level 1: Stop after updating DC quantization indices.*/
1246
0
  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1247
0
    if(_dec->variances!=NULL){
1248
0
      _ogg_free(_dec->variances);
1249
0
      _dec->variances=NULL;
1250
0
      _ogg_free(_dec->pp_frame_data);
1251
0
      _dec->pp_frame_data=NULL;
1252
0
    }
1253
0
    return 1;
1254
0
  }
1255
0
  if(_dec->variances==NULL){
1256
0
    size_t frame_sz;
1257
0
    size_t c_sz;
1258
0
    int    c_w;
1259
0
    int    c_h;
1260
0
    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1261
0
    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1262
0
    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1263
0
    c_sz=c_w*(size_t)c_h;
1264
    /*Allocate space for the chroma planes, even if we're not going to use
1265
       them; this simplifies allocation state management, though it may waste
1266
       memory on the few systems that don't overcommit pages.*/
1267
0
    frame_sz+=c_sz<<1;
1268
0
    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1269
0
     frame_sz*sizeof(_dec->pp_frame_data[0]));
1270
0
    _dec->variances=(int *)_ogg_malloc(
1271
0
     _dec->state.nfrags*sizeof(_dec->variances[0]));
1272
0
    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1273
0
      _ogg_free(_dec->pp_frame_data);
1274
0
      _dec->pp_frame_data=NULL;
1275
0
      _ogg_free(_dec->variances);
1276
0
      _dec->variances=NULL;
1277
0
      return 1;
1278
0
    }
1279
    /*Force an update of the PP buffer pointers.*/
1280
0
    _dec->pp_frame_state=0;
1281
0
  }
1282
  /*Update the PP buffer pointers if necessary.*/
1283
0
  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1284
0
    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1285
      /*If chroma processing is disabled, just use the PP luma plane.*/
1286
0
      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1287
0
      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1288
0
      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1289
0
      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1290
0
       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1291
0
    }
1292
0
    else{
1293
0
      size_t y_sz;
1294
0
      size_t c_sz;
1295
0
      int    c_w;
1296
0
      int    c_h;
1297
      /*Otherwise, set up pointers to all three PP planes.*/
1298
0
      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1299
0
      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1300
0
      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1301
0
      c_sz=c_w*(size_t)c_h;
1302
0
      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1303
0
      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1304
0
      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1305
0
      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1306
0
      _dec->pp_frame_buf[1].width=c_w;
1307
0
      _dec->pp_frame_buf[1].height=c_h;
1308
0
      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1309
0
      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1310
0
      _dec->pp_frame_buf[2].width=c_w;
1311
0
      _dec->pp_frame_buf[2].height=c_h;
1312
0
      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1313
0
      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1314
0
      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1315
0
    }
1316
0
    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1317
0
  }
1318
  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1319
0
  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1320
0
    memcpy(_dec->pp_frame_buf+1,
1321
0
     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1322
0
     sizeof(_dec->pp_frame_buf[1])*2);
1323
0
  }
1324
0
  return 0;
1325
0
}
1326
1327
1328
/*Initialize the main decoding pipeline.*/
1329
static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1330
1.12k
 oc_dec_pipeline_state *_pipe){
1331
1.12k
  const ptrdiff_t *coded_fragis;
1332
1.12k
  const ptrdiff_t *uncoded_fragis;
1333
1.12k
  int              flimit;
1334
1.12k
  int              pli;
1335
1.12k
  int              qii;
1336
1.12k
  int              qti;
1337
1.12k
  int              zzi;
1338
  /*If chroma is sub-sampled in the vertical direction, we have to decode two
1339
     super block rows of Y' for each super block row of Cb and Cr.*/
1340
1.12k
  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1341
  /*Initialize the token and extra bits indices for each plane and
1342
     coefficient.*/
1343
1.12k
  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1344
  /*Also copy over the initial the EOB run counts.*/
1345
1.12k
  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1346
  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1347
1.12k
  coded_fragis=_dec->state.coded_fragis;
1348
1.12k
  uncoded_fragis=coded_fragis+_dec->state.nfrags;
1349
4.51k
  for(pli=0;pli<3;pli++){
1350
3.38k
    ptrdiff_t ncoded_fragis;
1351
3.38k
    _pipe->coded_fragis[pli]=coded_fragis;
1352
3.38k
    _pipe->uncoded_fragis[pli]=uncoded_fragis;
1353
3.38k
    ncoded_fragis=_dec->state.ncoded_fragis[pli];
1354
3.38k
    coded_fragis+=ncoded_fragis;
1355
3.38k
    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1356
3.38k
  }
1357
  /*Set up condensed quantizer tables.*/
1358
4.51k
  for(pli=0;pli<3;pli++){
1359
7.96k
    for(qii=0;qii<_dec->state.nqis;qii++){
1360
13.7k
      for(qti=0;qti<2;qti++){
1361
9.16k
        _pipe->dequant[pli][qii][qti]=
1362
9.16k
         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1363
9.16k
      }
1364
4.58k
    }
1365
3.38k
  }
1366
  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1367
1.12k
  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1368
  /*Initialize the bounding value array for the loop filter.*/
1369
1.12k
  flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1370
1.12k
  _pipe->loop_filter=flimit!=0;
1371
1.12k
  if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1372
  /*Initialize any buffers needed for post-processing.
1373
    We also save the current post-processing level, to guard against the user
1374
     changing it from a callback.*/
1375
1.12k
  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1376
  /*If we don't have enough information to post-process, disable it, regardless
1377
     of the user-requested level.*/
1378
1.12k
  else{
1379
1.12k
    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1380
1.12k
    memcpy(_dec->pp_frame_buf,
1381
1.12k
     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1382
1.12k
     sizeof(_dec->pp_frame_buf[0])*3);
1383
1.12k
  }
1384
  /*Clear down the DCT coefficient buffer for the first block.*/
1385
73.3k
  for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1386
1.12k
}
1387
1388
/*Undo the DC prediction in a single plane of an MCU (one or two super block
1389
   rows).
1390
  As a side effect, the number of coded and uncoded fragments in this plane of
1391
   the MCU is also computed.*/
1392
void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1393
42.9k
 oc_dec_pipeline_state *_pipe,int _pli){
1394
42.9k
  const oc_fragment_plane *fplane;
1395
42.9k
  oc_fragment             *frags;
1396
42.9k
  int                     *pred_last;
1397
42.9k
  ptrdiff_t                ncoded_fragis;
1398
42.9k
  ptrdiff_t                fragi;
1399
42.9k
  int                      fragx;
1400
42.9k
  int                      fragy;
1401
42.9k
  int                      fragy0;
1402
42.9k
  int                      fragy_end;
1403
42.9k
  int                      nhfrags;
1404
  /*Compute the first and last fragment row of the current MCU for this
1405
     plane.*/
1406
42.9k
  fplane=_dec->state.fplanes+_pli;
1407
42.9k
  fragy0=_pipe->fragy0[_pli];
1408
42.9k
  fragy_end=_pipe->fragy_end[_pli];
1409
42.9k
  nhfrags=fplane->nhfrags;
1410
42.9k
  pred_last=_pipe->pred_last[_pli];
1411
42.9k
  frags=_dec->state.frags;
1412
42.9k
  ncoded_fragis=0;
1413
42.9k
  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1414
222k
  for(fragy=fragy0;fragy<fragy_end;fragy++){
1415
179k
    if(fragy==0){
1416
      /*For the first row, all of the cases reduce to just using the previous
1417
         predictor for the same reference frame.*/
1418
158k
      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1419
154k
        if(frags[fragi].coded){
1420
115k
          int refi;
1421
115k
          refi=frags[fragi].refi;
1422
115k
          pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1423
115k
          ncoded_fragis++;
1424
115k
        }
1425
154k
      }
1426
3.38k
    }
1427
176k
    else{
1428
176k
      oc_fragment *u_frags;
1429
176k
      int          l_ref;
1430
176k
      int          ul_ref;
1431
176k
      int          u_ref;
1432
176k
      u_frags=frags-nhfrags;
1433
176k
      l_ref=-1;
1434
176k
      ul_ref=-1;
1435
176k
      u_ref=u_frags[fragi].refi;
1436
12.4M
      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1437
12.2M
        int ur_ref;
1438
12.2M
        if(fragx+1>=nhfrags)ur_ref=-1;
1439
12.0M
        else ur_ref=u_frags[fragi+1].refi;
1440
12.2M
        if(frags[fragi].coded){
1441
9.28M
          int pred;
1442
9.28M
          int refi;
1443
9.28M
          refi=frags[fragi].refi;
1444
          /*We break out a separate case based on which of our neighbors use
1445
             the same reference frames.
1446
            This is somewhat faster than trying to make a generic case which
1447
             handles all of them, since it reduces lots of poorly predicted
1448
             jumps to one switch statement, and also lets a number of the
1449
             multiplications be optimized out by strength reduction.*/
1450
9.28M
          switch((l_ref==refi)|(ul_ref==refi)<<1|
1451
9.28M
           (u_ref==refi)<<2|(ur_ref==refi)<<3){
1452
54.8k
            default:pred=pred_last[refi];break;
1453
70.9k
            case  1:
1454
108k
            case  3:pred=frags[fragi-1].dc;break;
1455
120k
            case  2:pred=u_frags[fragi-1].dc;break;
1456
5.87k
            case  4:
1457
12.3k
            case  6:
1458
136k
            case 12:pred=u_frags[fragi].dc;break;
1459
57.5k
            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1460
209k
            case  8:pred=u_frags[fragi+1].dc;break;
1461
24.4k
            case  9:
1462
225k
            case 11:
1463
309k
            case 13:{
1464
              /*The TI compiler mis-compiles this line.*/
1465
309k
              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1466
309k
            }break;
1467
863k
            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1468
231k
            case 14:{
1469
231k
              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1470
231k
               +10*u_frags[fragi].dc)/16;
1471
231k
            }break;
1472
297k
            case  7:
1473
7.18M
            case 15:{
1474
7.18M
              int p0;
1475
7.18M
              int p1;
1476
7.18M
              int p2;
1477
7.18M
              p0=frags[fragi-1].dc;
1478
7.18M
              p1=u_frags[fragi-1].dc;
1479
7.18M
              p2=u_frags[fragi].dc;
1480
7.18M
              pred=(29*(p0+p2)-26*p1)/32;
1481
7.18M
              if(abs(pred-p2)>128)pred=p2;
1482
7.08M
              else if(abs(pred-p0)>128)pred=p0;
1483
7.01M
              else if(abs(pred-p1)>128)pred=p1;
1484
7.18M
            }break;
1485
9.28M
          }
1486
9.28M
          pred_last[refi]=frags[fragi].dc+=pred;
1487
9.28M
          ncoded_fragis++;
1488
9.28M
          l_ref=refi;
1489
9.28M
        }
1490
2.98M
        else l_ref=-1;
1491
12.2M
        ul_ref=u_ref;
1492
12.2M
        u_ref=ur_ref;
1493
12.2M
      }
1494
176k
    }
1495
179k
  }
1496
42.9k
  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1497
  /*Also save the number of uncoded fragments so we know how many to copy.*/
1498
42.9k
  _pipe->nuncoded_fragis[_pli]=
1499
42.9k
   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1500
42.9k
}
1501
1502
/*Reconstructs all coded fragments in a single MCU (one or two super block
1503
   rows).
1504
  This requires that each coded fragment have a proper macro block mode and
1505
   motion vector (if not in INTRA mode), and have its DC value decoded, with
1506
   the DC prediction process reversed, and the number of coded and uncoded
1507
   fragments in this plane of the MCU be counted.
1508
  The token lists for each color plane and coefficient should also be filled
1509
   in, along with initial token offsets, extra bits offsets, and EOB run
1510
   counts.*/
1511
static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1512
42.9k
 oc_dec_pipeline_state *_pipe,int _pli){
1513
42.9k
  unsigned char       *dct_tokens;
1514
42.9k
  const unsigned char *dct_fzig_zag;
1515
42.9k
  ogg_uint16_t         dc_quant[2];
1516
42.9k
  const oc_fragment   *frags;
1517
42.9k
  const ptrdiff_t     *coded_fragis;
1518
42.9k
  ptrdiff_t            ncoded_fragis;
1519
42.9k
  ptrdiff_t            fragii;
1520
42.9k
  ptrdiff_t           *ti;
1521
42.9k
  ptrdiff_t           *eob_runs;
1522
42.9k
  int                  qti;
1523
42.9k
  dct_tokens=_dec->dct_tokens;
1524
42.9k
  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1525
42.9k
  frags=_dec->state.frags;
1526
42.9k
  coded_fragis=_pipe->coded_fragis[_pli];
1527
42.9k
  ncoded_fragis=_pipe->ncoded_fragis[_pli];
1528
42.9k
  ti=_pipe->ti[_pli];
1529
42.9k
  eob_runs=_pipe->eob_runs[_pli];
1530
128k
  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1531
9.43M
  for(fragii=0;fragii<ncoded_fragis;fragii++){
1532
9.39M
    const ogg_uint16_t *ac_quant;
1533
9.39M
    ptrdiff_t           fragi;
1534
9.39M
    int                 last_zzi;
1535
9.39M
    int                 zzi;
1536
9.39M
    fragi=coded_fragis[fragii];
1537
9.39M
    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1538
9.39M
    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1539
    /*Decode the AC coefficients.*/
1540
114M
    for(zzi=0;zzi<64;){
1541
113M
      int token;
1542
113M
      last_zzi=zzi;
1543
113M
      if(eob_runs[zzi]){
1544
7.67M
        eob_runs[zzi]--;
1545
7.67M
        break;
1546
7.67M
      }
1547
105M
      else{
1548
105M
        ptrdiff_t eob;
1549
105M
        int       cw;
1550
105M
        int       rlen;
1551
105M
        int       coeff;
1552
105M
        int       lti;
1553
105M
        lti=ti[zzi];
1554
105M
        token=dct_tokens[lti++];
1555
105M
        cw=OC_DCT_CODE_WORD[token];
1556
        /*These parts could be done branchless, but the branches are fairly
1557
           predictable and the C code translates into more than a few
1558
           instructions, so it's worth it to avoid them.*/
1559
105M
        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1560
52.3M
          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1561
52.3M
        }
1562
105M
        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1563
105M
        if(token==OC_DCT_TOKEN_FAT_EOB){
1564
998
          eob+=dct_tokens[lti++]<<8;
1565
998
          if(eob==0)eob=OC_DCT_EOB_FINISH;
1566
998
        }
1567
105M
        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1568
105M
        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1569
105M
        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1570
105M
        eob_runs[zzi]=eob;
1571
105M
        ti[zzi]=lti;
1572
105M
        zzi+=rlen;
1573
105M
        _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1574
105M
         (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1575
105M
        zzi+=!eob;
1576
105M
      }
1577
113M
    }
1578
    /*TODO: zzi should be exactly 64 here.
1579
      If it's not, we should report some kind of warning.*/
1580
9.39M
    zzi=OC_MINI(zzi,64);
1581
9.39M
    _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1582
    /*last_zzi is always initialized.
1583
      If your compiler thinks otherwise, it is dumb.*/
1584
9.39M
    oc_state_frag_recon(&_dec->state,fragi,_pli,
1585
9.39M
     _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1586
9.39M
  }
1587
42.9k
  _pipe->coded_fragis[_pli]+=ncoded_fragis;
1588
  /*Right now the reconstructed MCU has only the coded blocks in it.*/
1589
  /*TODO: We make the decision here to always copy the uncoded blocks into it
1590
     from the reference frame.
1591
    We could also copy the coded blocks back over the reference frame, if we
1592
     wait for an additional MCU to be decoded, which might be faster if only a
1593
     small number of blocks are coded.
1594
    However, this introduces more latency, creating a larger cache footprint.
1595
    It's unknown which decision is better, but this one results in simpler
1596
     code, and the hard case (high bitrate, high resolution) is handled
1597
     correctly.*/
1598
  /*Copy the uncoded blocks from the previous reference frame.*/
1599
42.9k
  if(_pipe->nuncoded_fragis[_pli]>0){
1600
22.6k
    _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1601
22.6k
    oc_frag_copy_list(&_dec->state,
1602
22.6k
     _dec->state.ref_frame_data[OC_FRAME_SELF],
1603
22.6k
     _dec->state.ref_frame_data[OC_FRAME_PREV],
1604
22.6k
     _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1605
22.6k
     _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1606
22.6k
  }
1607
42.9k
}
1608
1609
/*Filter a horizontal block edge.*/
1610
static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1611
 const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1612
0
 int *_variance0,int *_variance1){
1613
0
  unsigned char       *rdst;
1614
0
  const unsigned char *rsrc;
1615
0
  unsigned char       *cdst;
1616
0
  const unsigned char *csrc;
1617
0
  int                  r[10];
1618
0
  int                  sum0;
1619
0
  int                  sum1;
1620
0
  int                  bx;
1621
0
  int                  by;
1622
0
  rdst=_dst;
1623
0
  rsrc=_src;
1624
0
  for(bx=0;bx<8;bx++){
1625
0
    cdst=rdst;
1626
0
    csrc=rsrc;
1627
0
    for(by=0;by<10;by++){
1628
0
      r[by]=*csrc;
1629
0
      csrc+=_src_ystride;
1630
0
    }
1631
0
    sum0=sum1=0;
1632
0
    for(by=0;by<4;by++){
1633
0
      sum0+=abs(r[by+1]-r[by]);
1634
0
      sum1+=abs(r[by+5]-r[by+6]);
1635
0
    }
1636
0
    *_variance0+=OC_MINI(255,sum0);
1637
0
    *_variance1+=OC_MINI(255,sum1);
1638
0
    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1639
0
      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1640
0
      cdst+=_dst_ystride;
1641
0
      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1642
0
      cdst+=_dst_ystride;
1643
0
      for(by=0;by<4;by++){
1644
0
        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1645
0
         r[by+4]+r[by+5]+r[by+6]+4>>3);
1646
0
        cdst+=_dst_ystride;
1647
0
      }
1648
0
      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1649
0
      cdst+=_dst_ystride;
1650
0
      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1651
0
    }
1652
0
    else{
1653
0
      for(by=1;by<=8;by++){
1654
0
        *cdst=(unsigned char)r[by];
1655
0
        cdst+=_dst_ystride;
1656
0
      }
1657
0
    }
1658
0
    rdst++;
1659
0
    rsrc++;
1660
0
  }
1661
0
}
1662
1663
/*Filter a vertical block edge.*/
1664
static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1665
0
 int _qstep,int _flimit,int *_variances){
1666
0
  unsigned char       *rdst;
1667
0
  const unsigned char *rsrc;
1668
0
  unsigned char       *cdst;
1669
0
  int                  r[10];
1670
0
  int                  sum0;
1671
0
  int                  sum1;
1672
0
  int                  bx;
1673
0
  int                  by;
1674
0
  cdst=_dst;
1675
0
  for(by=0;by<8;by++){
1676
0
    rsrc=cdst-1;
1677
0
    rdst=cdst;
1678
0
    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1679
0
    sum0=sum1=0;
1680
0
    for(bx=0;bx<4;bx++){
1681
0
      sum0+=abs(r[bx+1]-r[bx]);
1682
0
      sum1+=abs(r[bx+5]-r[bx+6]);
1683
0
    }
1684
0
    _variances[0]+=OC_MINI(255,sum0);
1685
0
    _variances[1]+=OC_MINI(255,sum1);
1686
0
    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1687
0
      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1688
0
      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1689
0
      for(bx=0;bx<4;bx++){
1690
0
        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1691
0
         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1692
0
      }
1693
0
      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1694
0
      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1695
0
    }
1696
0
    cdst+=_dst_ystride;
1697
0
  }
1698
0
}
1699
1700
static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1701
 th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1702
0
 int _fragy_end){
1703
0
  oc_fragment_plane   *fplane;
1704
0
  int                 *variance;
1705
0
  unsigned char       *dc_qi;
1706
0
  unsigned char       *dst;
1707
0
  const unsigned char *src;
1708
0
  ptrdiff_t            froffset;
1709
0
  int                  dst_ystride;
1710
0
  int                  src_ystride;
1711
0
  int                  nhfrags;
1712
0
  int                  width;
1713
0
  int                  notstart;
1714
0
  int                  notdone;
1715
0
  int                  flimit;
1716
0
  int                  qstep;
1717
0
  int                  y_end;
1718
0
  int                  y;
1719
0
  int                  x;
1720
0
  _dst+=_pli;
1721
0
  _src+=_pli;
1722
0
  fplane=_dec->state.fplanes+_pli;
1723
0
  nhfrags=fplane->nhfrags;
1724
0
  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1725
0
  variance=_dec->variances+froffset;
1726
0
  dc_qi=_dec->dc_qis+froffset;
1727
0
  notstart=_fragy0>0;
1728
0
  notdone=_fragy_end<fplane->nvfrags;
1729
  /*We want to clear an extra row of variances, except at the end.*/
1730
0
  memset(variance+(nhfrags&-notstart),0,
1731
0
   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1732
  /*Except for the first time, we want to point to the middle of the row.*/
1733
0
  y=(_fragy0<<3)+(notstart<<2);
1734
0
  dst_ystride=_dst->stride;
1735
0
  src_ystride=_src->stride;
1736
0
  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1737
0
  src=_src->data+y*(ptrdiff_t)src_ystride;
1738
0
  width=_dst->width;
1739
0
  for(;y<4;y++){
1740
0
    memcpy(dst,src,width*sizeof(dst[0]));
1741
0
    dst+=dst_ystride;
1742
0
    src+=src_ystride;
1743
0
  }
1744
  /*We also want to skip the last row in the frame for this loop.*/
1745
0
  y_end=_fragy_end-!notdone<<3;
1746
0
  for(;y<y_end;y+=8){
1747
0
    qstep=_dec->pp_dc_scale[*dc_qi];
1748
0
    flimit=(qstep*3)>>2;
1749
0
    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1750
0
     qstep,flimit,variance,variance+nhfrags);
1751
0
    variance++;
1752
0
    dc_qi++;
1753
0
    for(x=8;x<width;x+=8){
1754
0
      qstep=_dec->pp_dc_scale[*dc_qi];
1755
0
      flimit=(qstep*3)>>2;
1756
0
      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1757
0
       qstep,flimit,variance,variance+nhfrags);
1758
0
      oc_filter_vedge(dst+x-(dst_ystride*4)-4,dst_ystride,
1759
0
       qstep,flimit,variance-1);
1760
0
      variance++;
1761
0
      dc_qi++;
1762
0
    }
1763
0
    dst+=dst_ystride*8;
1764
0
    src+=src_ystride*8;
1765
0
  }
1766
  /*And finally, handle the last row in the frame, if it's in the range.*/
1767
0
  if(!notdone){
1768
0
    int height;
1769
0
    height=_dst->height;
1770
0
    for(;y<height;y++){
1771
0
      memcpy(dst,src,width*sizeof(dst[0]));
1772
0
      dst+=dst_ystride;
1773
0
      src+=src_ystride;
1774
0
    }
1775
    /*Filter the last row of vertical block edges.*/
1776
0
    dc_qi++;
1777
0
    for(x=8;x<width;x+=8){
1778
0
      qstep=_dec->pp_dc_scale[*dc_qi++];
1779
0
      flimit=(qstep*3)>>2;
1780
0
      oc_filter_vedge(dst+x-(dst_ystride*8)-4,dst_ystride,
1781
0
       qstep,flimit,variance++);
1782
0
    }
1783
0
  }
1784
0
}
1785
1786
static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1787
0
 int _dc_scale,int _sharp_mod,int _strong){
1788
0
  static const unsigned char OC_MOD_MAX[2]={24,32};
1789
0
  static const unsigned char OC_MOD_SHIFT[2]={1,0};
1790
0
  const unsigned char *psrc;
1791
0
  const unsigned char *src;
1792
0
  const unsigned char *nsrc;
1793
0
  unsigned char       *dst;
1794
0
  int                  vmod[72];
1795
0
  int                  hmod[72];
1796
0
  int                  mod_hi;
1797
0
  int                  by;
1798
0
  int                  bx;
1799
0
  mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1800
0
  dst=_idata;
1801
0
  src=dst;
1802
0
  psrc=src-(_ystride&-!(_b&4));
1803
0
  for(by=0;by<9;by++){
1804
0
    for(bx=0;bx<8;bx++){
1805
0
      int mod;
1806
0
      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1807
0
      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1808
0
    }
1809
0
    psrc=src;
1810
0
    src+=_ystride&-(!(_b&8)|by<7);
1811
0
  }
1812
0
  nsrc=dst;
1813
0
  psrc=dst-!(_b&1);
1814
0
  for(bx=0;bx<9;bx++){
1815
0
    src=nsrc;
1816
0
    for(by=0;by<8;by++){
1817
0
      int mod;
1818
0
      mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1819
0
      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1820
0
      psrc+=_ystride;
1821
0
      src+=_ystride;
1822
0
    }
1823
0
    psrc=nsrc;
1824
0
    nsrc+=!(_b&2)|bx<7;
1825
0
  }
1826
0
  src=dst;
1827
0
  psrc=src-(_ystride&-!(_b&4));
1828
0
  nsrc=src+_ystride;
1829
0
  for(by=0;by<8;by++){
1830
0
    int a;
1831
0
    int b;
1832
0
    int w;
1833
0
    a=128;
1834
0
    b=64;
1835
0
    w=hmod[by];
1836
0
    a-=w;
1837
0
    b+=w**(src-!(_b&1));
1838
0
    w=vmod[by<<3];
1839
0
    a-=w;
1840
0
    b+=w*psrc[0];
1841
0
    w=vmod[by+1<<3];
1842
0
    a-=w;
1843
0
    b+=w*nsrc[0];
1844
0
    w=hmod[(1<<3)+by];
1845
0
    a-=w;
1846
0
    b+=w*src[1];
1847
0
    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1848
0
    for(bx=1;bx<7;bx++){
1849
0
      a=128;
1850
0
      b=64;
1851
0
      w=hmod[(bx<<3)+by];
1852
0
      a-=w;
1853
0
      b+=w*src[bx-1];
1854
0
      w=vmod[(by<<3)+bx];
1855
0
      a-=w;
1856
0
      b+=w*psrc[bx];
1857
0
      w=vmod[(by+1<<3)+bx];
1858
0
      a-=w;
1859
0
      b+=w*nsrc[bx];
1860
0
      w=hmod[(bx+1<<3)+by];
1861
0
      a-=w;
1862
0
      b+=w*src[bx+1];
1863
0
      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1864
0
    }
1865
0
    a=128;
1866
0
    b=64;
1867
0
    w=hmod[(7<<3)+by];
1868
0
    a-=w;
1869
0
    b+=w*src[6];
1870
0
    w=vmod[(by<<3)+7];
1871
0
    a-=w;
1872
0
    b+=w*psrc[7];
1873
0
    w=vmod[(by+1<<3)+7];
1874
0
    a-=w;
1875
0
    b+=w*nsrc[7];
1876
0
    w=hmod[(8<<3)+by];
1877
0
    a-=w;
1878
0
    b+=w*src[7+!(_b&2)];
1879
0
    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1880
0
    dst+=_ystride;
1881
0
    psrc=src;
1882
0
    src=nsrc;
1883
0
    nsrc+=_ystride&-(!(_b&8)|by<6);
1884
0
  }
1885
0
}
1886
1887
0
#define OC_DERING_THRESH1 (384)
1888
0
#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1889
0
#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1890
0
#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1891
1892
static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1893
0
 int _pli,int _fragy0,int _fragy_end){
1894
0
  th_img_plane      *iplane;
1895
0
  oc_fragment_plane *fplane;
1896
0
  oc_fragment       *frag;
1897
0
  int               *variance;
1898
0
  unsigned char     *idata;
1899
0
  ptrdiff_t          froffset;
1900
0
  int                ystride;
1901
0
  int                nhfrags;
1902
0
  int                sthresh;
1903
0
  int                strong;
1904
0
  int                y_end;
1905
0
  int                width;
1906
0
  int                height;
1907
0
  int                y;
1908
0
  int                x;
1909
0
  iplane=_img+_pli;
1910
0
  fplane=_dec->state.fplanes+_pli;
1911
0
  nhfrags=fplane->nhfrags;
1912
0
  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1913
0
  variance=_dec->variances+froffset;
1914
0
  frag=_dec->state.frags+froffset;
1915
0
  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1916
0
  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1917
0
  y=_fragy0<<3;
1918
0
  ystride=iplane->stride;
1919
0
  idata=iplane->data+y*(ptrdiff_t)ystride;
1920
0
  y_end=_fragy_end<<3;
1921
0
  width=iplane->width;
1922
0
  height=iplane->height;
1923
0
  for(;y<y_end;y+=8){
1924
0
    for(x=0;x<width;x+=8){
1925
0
      int b;
1926
0
      int qi;
1927
0
      int var;
1928
0
      qi=_dec->state.qis[frag->qii];
1929
0
      var=*variance;
1930
0
      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1931
0
      if(strong&&var>sthresh){
1932
0
        oc_dering_block(idata+x,ystride,b,
1933
0
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1934
0
        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1935
0
         !(b&2)&&variance[1]>OC_DERING_THRESH4||
1936
0
         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1937
0
         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1938
0
          oc_dering_block(idata+x,ystride,b,
1939
0
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940
0
          oc_dering_block(idata+x,ystride,b,
1941
0
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1942
0
        }
1943
0
      }
1944
0
      else if(var>OC_DERING_THRESH2){
1945
0
        oc_dering_block(idata+x,ystride,b,
1946
0
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1947
0
      }
1948
0
      else if(var>OC_DERING_THRESH1){
1949
0
        oc_dering_block(idata+x,ystride,b,
1950
0
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1951
0
      }
1952
0
      frag++;
1953
0
      variance++;
1954
0
    }
1955
0
    idata+=ystride*8;
1956
0
  }
1957
0
}
1958
1959
1960
1961
1.08k
th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1962
1.08k
  oc_dec_ctx *dec;
1963
1.08k
  if(_info==NULL||_setup==NULL)return NULL;
1964
1.08k
  dec=oc_aligned_malloc(sizeof(*dec),16);
1965
1.08k
  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1966
6
    oc_aligned_free(dec);
1967
6
    return NULL;
1968
6
  }
1969
1.08k
  dec->state.curframe_num=0;
1970
1.08k
  return dec;
1971
1.08k
}
1972
1973
1.08k
void th_decode_free(th_dec_ctx *_dec){
1974
1.08k
  if(_dec!=NULL){
1975
1.08k
    oc_dec_clear(_dec);
1976
1.08k
    oc_aligned_free(_dec);
1977
1.08k
  }
1978
1.08k
}
1979
1980
int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1981
0
 size_t _buf_sz){
1982
0
  switch(_req){
1983
0
  case TH_DECCTL_GET_PPLEVEL_MAX:{
1984
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1985
0
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
1986
0
    (*(int *)_buf)=OC_PP_LEVEL_MAX;
1987
0
    return 0;
1988
0
  }break;
1989
0
  case TH_DECCTL_SET_PPLEVEL:{
1990
0
    int pp_level;
1991
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1992
0
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
1993
0
    pp_level=*(int *)_buf;
1994
0
    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1995
0
    _dec->pp_level=pp_level;
1996
0
    return 0;
1997
0
  }break;
1998
0
  case TH_DECCTL_SET_GRANPOS:{
1999
0
    ogg_int64_t granpos;
2000
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2001
0
    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2002
0
    granpos=*(ogg_int64_t *)_buf;
2003
0
    if(granpos<0)return TH_EINVAL;
2004
0
    _dec->state.granpos=granpos;
2005
0
    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2006
0
     -_dec->state.granpos_bias;
2007
0
    _dec->state.curframe_num=_dec->state.keyframe_num
2008
0
     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2009
0
    return 0;
2010
0
  }break;
2011
0
  case TH_DECCTL_SET_STRIPE_CB:{
2012
0
    th_stripe_callback *cb;
2013
0
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2014
0
    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2015
0
    cb=(th_stripe_callback *)_buf;
2016
0
    _dec->stripe_cb.ctx=cb->ctx;
2017
0
    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2018
0
    return 0;
2019
0
  }break;
2020
#ifdef HAVE_CAIRO
2021
  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2022
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2023
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2024
    _dec->telemetry_mbmode=*(int *)_buf;
2025
    return 0;
2026
  }break;
2027
  case TH_DECCTL_SET_TELEMETRY_MV:{
2028
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2029
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2030
    _dec->telemetry_mv=*(int *)_buf;
2031
    return 0;
2032
  }break;
2033
  case TH_DECCTL_SET_TELEMETRY_QI:{
2034
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036
    _dec->telemetry_qi=*(int *)_buf;
2037
    return 0;
2038
  }break;
2039
  case TH_DECCTL_SET_TELEMETRY_BITS:{
2040
    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2041
    if(_buf_sz!=sizeof(int))return TH_EINVAL;
2042
    _dec->telemetry_bits=*(int *)_buf;
2043
    return 0;
2044
  }break;
2045
#endif
2046
0
  default:return TH_EIMPL;
2047
0
  }
2048
0
}
2049
2050
/*We're decoding an INTER frame, but have no initialized reference
2051
   buffers (i.e., decoding did not start on a key frame).
2052
  We initialize them to a solid gray here.*/
2053
472
static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2054
472
  th_info   *info;
2055
472
  size_t     yplane_sz;
2056
472
  size_t     cplane_sz;
2057
472
  ptrdiff_t  yoffset;
2058
472
  int        yhstride;
2059
472
  int        yheight;
2060
472
  int        chstride;
2061
472
  int        cheight;
2062
472
  _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2063
472
  _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2064
472
  _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2065
472
  _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2066
472
   _dec->state.ref_frame_data[OC_FRAME_PREV]=
2067
472
   _dec->state.ref_frame_data[OC_FRAME_SELF]=
2068
472
   _dec->state.ref_frame_bufs[0][0].data;
2069
472
  memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2070
472
   sizeof(_dec->pp_frame_buf[0])*3);
2071
472
  info=&_dec->state.info;
2072
472
  yhstride=abs(_dec->state.ref_ystride[0]);
2073
472
  yheight=info->frame_height+2*OC_UMV_PADDING;
2074
472
  chstride=abs(_dec->state.ref_ystride[1]);
2075
472
  cheight=yheight>>!(info->pixel_fmt&2);
2076
472
  yplane_sz=yhstride*(size_t)yheight+16;
2077
472
  cplane_sz=chstride*(size_t)cheight;
2078
472
  yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2079
472
  memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2080
472
}
2081
2082
#if defined(HAVE_CAIRO)
2083
static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
2084
 int _telemetry){
2085
  /*Stuff the plane into cairo.*/
2086
  cairo_surface_t *cs;
2087
  unsigned char   *data;
2088
  unsigned char   *y_row;
2089
  unsigned char   *u_row;
2090
  unsigned char   *v_row;
2091
  unsigned char   *rgb_row;
2092
  int              cstride;
2093
  int              w;
2094
  int              h;
2095
  int              x;
2096
  int              y;
2097
  int              hdec;
2098
  int              vdec;
2099
  w=_ycbcr[0].width;
2100
  h=_ycbcr[0].height;
2101
  hdec=!(_dec->state.info.pixel_fmt&1);
2102
  vdec=!(_dec->state.info.pixel_fmt&2);
2103
  /*Lazy data buffer init.
2104
    We could try to reuse the post-processing buffer, which would save
2105
     memory, but complicate the allocation logic there.
2106
    I don't think anyone cares about memory usage when using telemetry; it is
2107
     not meant for embedded devices.*/
2108
  if(_dec->telemetry_frame_data==NULL){
2109
    _dec->telemetry_frame_data=_ogg_malloc(
2110
     (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2111
    if(_dec->telemetry_frame_data==NULL)return;
2112
  }
2113
  cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2114
  /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2115
  data=cairo_image_surface_get_data(cs);
2116
  if(data==NULL){
2117
    cairo_surface_destroy(cs);
2118
    return;
2119
  }
2120
  cstride=cairo_image_surface_get_stride(cs);
2121
  y_row=_ycbcr[0].data;
2122
  u_row=_ycbcr[1].data;
2123
  v_row=_ycbcr[2].data;
2124
  rgb_row=data;
2125
  for(y=0;y<h;y++){
2126
    for(x=0;x<w;x++){
2127
      int r;
2128
      int g;
2129
      int b;
2130
      r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2131
      g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2132
       -2672387*v_row[x>>hdec]+447306710)/3287200;
2133
      b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2134
      rgb_row[4*x+0]=OC_CLAMP255(b);
2135
      rgb_row[4*x+1]=OC_CLAMP255(g);
2136
      rgb_row[4*x+2]=OC_CLAMP255(r);
2137
    }
2138
    y_row+=_ycbcr[0].stride;
2139
    u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2140
    v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2141
    rgb_row+=cstride;
2142
  }
2143
  /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2144
  {
2145
    cairo_t           *c;
2146
    const oc_fragment *frags;
2147
    oc_mv             *frag_mvs;
2148
    const signed char *mb_modes;
2149
    oc_mb_map         *mb_maps;
2150
    size_t             nmbs;
2151
    size_t             mbi;
2152
    int                row2;
2153
    int                col2;
2154
    int                qim[3]={0,0,0};
2155
    if(_dec->state.nqis==2){
2156
      int bqi;
2157
      bqi=_dec->state.qis[0];
2158
      if(_dec->state.qis[1]>bqi)qim[1]=1;
2159
      if(_dec->state.qis[1]<bqi)qim[1]=-1;
2160
    }
2161
    if(_dec->state.nqis==3){
2162
      int bqi;
2163
      int cqi;
2164
      int dqi;
2165
      bqi=_dec->state.qis[0];
2166
      cqi=_dec->state.qis[1];
2167
      dqi=_dec->state.qis[2];
2168
      if(cqi>bqi&&dqi>bqi){
2169
        if(dqi>cqi){
2170
          qim[1]=1;
2171
          qim[2]=2;
2172
        }
2173
        else{
2174
          qim[1]=2;
2175
          qim[2]=1;
2176
        }
2177
      }
2178
      else if(cqi<bqi&&dqi<bqi){
2179
        if(dqi<cqi){
2180
          qim[1]=-1;
2181
          qim[2]=-2;
2182
        }
2183
        else{
2184
          qim[1]=-2;
2185
          qim[2]=-1;
2186
        }
2187
      }
2188
      else{
2189
        if(cqi<bqi)qim[1]=-1;
2190
        else qim[1]=1;
2191
        if(dqi<bqi)qim[2]=-1;
2192
        else qim[2]=1;
2193
      }
2194
    }
2195
    c=cairo_create(cs);
2196
    frags=_dec->state.frags;
2197
    frag_mvs=_dec->state.frag_mvs;
2198
    mb_modes=_dec->state.mb_modes;
2199
    mb_maps=_dec->state.mb_maps;
2200
    nmbs=_dec->state.nmbs;
2201
    row2=0;
2202
    col2=0;
2203
    for(mbi=0;mbi<nmbs;mbi++){
2204
      float x;
2205
      float y;
2206
      int   bi;
2207
      y=h-(row2+((col2+1>>1)&1))*16-16;
2208
      x=(col2>>1)*16;
2209
      cairo_set_line_width(c,1.);
2210
      /*Keyframe (all intra) red box.*/
2211
      if(_dec->state.frame_type==OC_INTRA_FRAME){
2212
        if(_dec->telemetry_mbmode&0x02){
2213
          cairo_set_source_rgba(c,1.,0,0,.5);
2214
          cairo_rectangle(c,x+2.5,y+2.5,11,11);
2215
          cairo_stroke_preserve(c);
2216
          cairo_set_source_rgba(c,1.,0,0,.25);
2217
          cairo_fill(c);
2218
        }
2219
      }
2220
      else{
2221
        ptrdiff_t fragi;
2222
        int       frag_mvx;
2223
        int       frag_mvy;
2224
        for(bi=0;bi<4;bi++){
2225
          fragi=mb_maps[mbi][0][bi];
2226
          if(fragi>=0&&frags[fragi].coded){
2227
            frag_mvx=OC_MV_X(frag_mvs[fragi]);
2228
            frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2229
            break;
2230
          }
2231
        }
2232
        if(bi<4){
2233
          switch(mb_modes[mbi]){
2234
            case OC_MODE_INTRA:{
2235
              if(_dec->telemetry_mbmode&0x02){
2236
                cairo_set_source_rgba(c,1.,0,0,.5);
2237
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2238
                cairo_stroke_preserve(c);
2239
                cairo_set_source_rgba(c,1.,0,0,.25);
2240
                cairo_fill(c);
2241
              }
2242
            }break;
2243
            case OC_MODE_INTER_NOMV:{
2244
              if(_dec->telemetry_mbmode&0x01){
2245
                cairo_set_source_rgba(c,0,0,1.,.5);
2246
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2247
                cairo_stroke_preserve(c);
2248
                cairo_set_source_rgba(c,0,0,1.,.25);
2249
                cairo_fill(c);
2250
              }
2251
            }break;
2252
            case OC_MODE_INTER_MV:{
2253
              if(_dec->telemetry_mbmode&0x04){
2254
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2255
                cairo_set_source_rgba(c,0,1.,0,.5);
2256
                cairo_stroke(c);
2257
              }
2258
              if(_dec->telemetry_mv&0x04){
2259
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2260
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2261
                cairo_set_line_width(c,3.);
2262
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2263
                cairo_stroke_preserve(c);
2264
                cairo_set_line_width(c,2.);
2265
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2266
                cairo_stroke_preserve(c);
2267
                cairo_set_line_width(c,1.);
2268
                cairo_line_to(c,x+8,y+8);
2269
                cairo_stroke(c);
2270
              }
2271
            }break;
2272
            case OC_MODE_INTER_MV_LAST:{
2273
              if(_dec->telemetry_mbmode&0x08){
2274
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2275
                cairo_set_source_rgba(c,0,1.,0,.5);
2276
                cairo_move_to(c,x+13.5,y+2.5);
2277
                cairo_line_to(c,x+2.5,y+8);
2278
                cairo_line_to(c,x+13.5,y+13.5);
2279
                cairo_stroke(c);
2280
              }
2281
              if(_dec->telemetry_mv&0x08){
2282
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2283
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2284
                cairo_set_line_width(c,3.);
2285
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2286
                cairo_stroke_preserve(c);
2287
                cairo_set_line_width(c,2.);
2288
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2289
                cairo_stroke_preserve(c);
2290
                cairo_set_line_width(c,1.);
2291
                cairo_line_to(c,x+8,y+8);
2292
                cairo_stroke(c);
2293
              }
2294
            }break;
2295
            case OC_MODE_INTER_MV_LAST2:{
2296
              if(_dec->telemetry_mbmode&0x10){
2297
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2298
                cairo_set_source_rgba(c,0,1.,0,.5);
2299
                cairo_move_to(c,x+8,y+2.5);
2300
                cairo_line_to(c,x+2.5,y+8);
2301
                cairo_line_to(c,x+8,y+13.5);
2302
                cairo_move_to(c,x+13.5,y+2.5);
2303
                cairo_line_to(c,x+8,y+8);
2304
                cairo_line_to(c,x+13.5,y+13.5);
2305
                cairo_stroke(c);
2306
              }
2307
              if(_dec->telemetry_mv&0x10){
2308
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2309
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2310
                cairo_set_line_width(c,3.);
2311
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2312
                cairo_stroke_preserve(c);
2313
                cairo_set_line_width(c,2.);
2314
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2315
                cairo_stroke_preserve(c);
2316
                cairo_set_line_width(c,1.);
2317
                cairo_line_to(c,x+8,y+8);
2318
                cairo_stroke(c);
2319
              }
2320
            }break;
2321
            case OC_MODE_GOLDEN_NOMV:{
2322
              if(_dec->telemetry_mbmode&0x20){
2323
                cairo_set_source_rgba(c,1.,1.,0,.5);
2324
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2325
                cairo_stroke_preserve(c);
2326
                cairo_set_source_rgba(c,1.,1.,0,.25);
2327
                cairo_fill(c);
2328
              }
2329
            }break;
2330
            case OC_MODE_GOLDEN_MV:{
2331
              if(_dec->telemetry_mbmode&0x40){
2332
                cairo_rectangle(c,x+2.5,y+2.5,11,11);
2333
                cairo_set_source_rgba(c,1.,1.,0,.5);
2334
                cairo_stroke(c);
2335
              }
2336
              if(_dec->telemetry_mv&0x40){
2337
                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2338
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2339
                cairo_set_line_width(c,3.);
2340
                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2341
                cairo_stroke_preserve(c);
2342
                cairo_set_line_width(c,2.);
2343
                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2344
                cairo_stroke_preserve(c);
2345
                cairo_set_line_width(c,1.);
2346
                cairo_line_to(c,x+8,y+8);
2347
                cairo_stroke(c);
2348
              }
2349
            }break;
2350
            case OC_MODE_INTER_MV_FOUR:{
2351
              if(_dec->telemetry_mbmode&0x80){
2352
                cairo_rectangle(c,x+2.5,y+2.5,4,4);
2353
                cairo_rectangle(c,x+9.5,y+2.5,4,4);
2354
                cairo_rectangle(c,x+2.5,y+9.5,4,4);
2355
                cairo_rectangle(c,x+9.5,y+9.5,4,4);
2356
                cairo_set_source_rgba(c,0,1.,0,.5);
2357
                cairo_stroke(c);
2358
              }
2359
              /*4mv is odd, coded in raster order.*/
2360
              fragi=mb_maps[mbi][0][0];
2361
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2362
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2363
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2364
                cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2365
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2366
                cairo_set_line_width(c,3.);
2367
                cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2368
                cairo_stroke_preserve(c);
2369
                cairo_set_line_width(c,2.);
2370
                cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2371
                cairo_stroke_preserve(c);
2372
                cairo_set_line_width(c,1.);
2373
                cairo_line_to(c,x+4,y+12);
2374
                cairo_stroke(c);
2375
              }
2376
              fragi=mb_maps[mbi][0][1];
2377
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2378
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2379
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2380
                cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2381
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2382
                cairo_set_line_width(c,3.);
2383
                cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2384
                cairo_stroke_preserve(c);
2385
                cairo_set_line_width(c,2.);
2386
                cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2387
                cairo_stroke_preserve(c);
2388
                cairo_set_line_width(c,1.);
2389
                cairo_line_to(c,x+12,y+12);
2390
                cairo_stroke(c);
2391
              }
2392
              fragi=mb_maps[mbi][0][2];
2393
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2394
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2395
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2396
                cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2397
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2398
                cairo_set_line_width(c,3.);
2399
                cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2400
                cairo_stroke_preserve(c);
2401
                cairo_set_line_width(c,2.);
2402
                cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2403
                cairo_stroke_preserve(c);
2404
                cairo_set_line_width(c,1.);
2405
                cairo_line_to(c,x+4,y+4);
2406
                cairo_stroke(c);
2407
              }
2408
              fragi=mb_maps[mbi][0][3];
2409
              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2410
                frag_mvx=OC_MV_X(frag_mvs[fragi]);
2411
                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2412
                cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2413
                cairo_set_source_rgba(c,1.,1.,1.,.9);
2414
                cairo_set_line_width(c,3.);
2415
                cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2416
                cairo_stroke_preserve(c);
2417
                cairo_set_line_width(c,2.);
2418
                cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2419
                cairo_stroke_preserve(c);
2420
                cairo_set_line_width(c,1.);
2421
                cairo_line_to(c,x+12,y+4);
2422
                cairo_stroke(c);
2423
              }
2424
            }break;
2425
          }
2426
        }
2427
      }
2428
      /*qii illustration.*/
2429
      if(_dec->telemetry_qi&0x2){
2430
        cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2431
        for(bi=0;bi<4;bi++){
2432
          ptrdiff_t fragi;
2433
          int       qiv;
2434
          int       xp;
2435
          int       yp;
2436
          xp=x+(bi&1)*8;
2437
          yp=y+8-(bi&2)*4;
2438
          fragi=mb_maps[mbi][0][bi];
2439
          if(fragi>=0&&frags[fragi].coded){
2440
            qiv=qim[frags[fragi].qii];
2441
            cairo_set_line_width(c,3.);
2442
            cairo_set_source_rgba(c,0.,0.,0.,.5);
2443
            switch(qiv){
2444
              /*Double plus:*/
2445
              case 2:{
2446
                if((bi&1)^((bi&2)>>1)){
2447
                  cairo_move_to(c,xp+2.5,yp+1.5);
2448
                  cairo_line_to(c,xp+2.5,yp+3.5);
2449
                  cairo_move_to(c,xp+1.5,yp+2.5);
2450
                  cairo_line_to(c,xp+3.5,yp+2.5);
2451
                  cairo_move_to(c,xp+5.5,yp+4.5);
2452
                  cairo_line_to(c,xp+5.5,yp+6.5);
2453
                  cairo_move_to(c,xp+4.5,yp+5.5);
2454
                  cairo_line_to(c,xp+6.5,yp+5.5);
2455
                  cairo_stroke_preserve(c);
2456
                  cairo_set_source_rgba(c,0.,1.,1.,1.);
2457
                }
2458
                else{
2459
                  cairo_move_to(c,xp+5.5,yp+1.5);
2460
                  cairo_line_to(c,xp+5.5,yp+3.5);
2461
                  cairo_move_to(c,xp+4.5,yp+2.5);
2462
                  cairo_line_to(c,xp+6.5,yp+2.5);
2463
                  cairo_move_to(c,xp+2.5,yp+4.5);
2464
                  cairo_line_to(c,xp+2.5,yp+6.5);
2465
                  cairo_move_to(c,xp+1.5,yp+5.5);
2466
                  cairo_line_to(c,xp+3.5,yp+5.5);
2467
                  cairo_stroke_preserve(c);
2468
                  cairo_set_source_rgba(c,0.,1.,1.,1.);
2469
                }
2470
              }break;
2471
              /*Double minus:*/
2472
              case -2:{
2473
                cairo_move_to(c,xp+2.5,yp+2.5);
2474
                cairo_line_to(c,xp+5.5,yp+2.5);
2475
                cairo_move_to(c,xp+2.5,yp+5.5);
2476
                cairo_line_to(c,xp+5.5,yp+5.5);
2477
                cairo_stroke_preserve(c);
2478
                cairo_set_source_rgba(c,1.,1.,1.,1.);
2479
              }break;
2480
              /*Plus:*/
2481
              case 1:{
2482
                if((bi&2)==0)yp-=2;
2483
                if((bi&1)==0)xp-=2;
2484
                cairo_move_to(c,xp+4.5,yp+2.5);
2485
                cairo_line_to(c,xp+4.5,yp+6.5);
2486
                cairo_move_to(c,xp+2.5,yp+4.5);
2487
                cairo_line_to(c,xp+6.5,yp+4.5);
2488
                cairo_stroke_preserve(c);
2489
                cairo_set_source_rgba(c,.1,1.,.3,1.);
2490
                break;
2491
              }
2492
              /*Fall through.*/
2493
              /*Minus:*/
2494
              case -1:{
2495
                cairo_move_to(c,xp+2.5,yp+4.5);
2496
                cairo_line_to(c,xp+6.5,yp+4.5);
2497
                cairo_stroke_preserve(c);
2498
                cairo_set_source_rgba(c,1.,.3,.1,1.);
2499
              }break;
2500
              default:continue;
2501
            }
2502
            cairo_set_line_width(c,1.);
2503
            cairo_stroke(c);
2504
          }
2505
        }
2506
      }
2507
      col2++;
2508
      if((col2>>1)>=_dec->state.nhmbs){
2509
        col2=0;
2510
        row2+=2;
2511
      }
2512
    }
2513
    /*Bit usage indicator[s]:*/
2514
    if(_dec->telemetry_bits){
2515
      int widths[6];
2516
      int fpsn;
2517
      int fpsd;
2518
      int mult;
2519
      int fullw;
2520
      int padw;
2521
      int i;
2522
      fpsn=_dec->state.info.fps_numerator;
2523
      fpsd=_dec->state.info.fps_denominator;
2524
      mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2525
      fullw=250.f*h*fpsd*mult/fpsn;
2526
      padw=w-24;
2527
      /*Header and coded block bits.*/
2528
      if(_dec->telemetry_frame_bytes<0||
2529
       _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2530
        _dec->telemetry_frame_bytes=0;
2531
      }
2532
      if(_dec->telemetry_coding_bytes<0||
2533
       _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2534
        _dec->telemetry_coding_bytes=0;
2535
      }
2536
      if(_dec->telemetry_mode_bytes<0||
2537
       _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2538
        _dec->telemetry_mode_bytes=0;
2539
      }
2540
      if(_dec->telemetry_mv_bytes<0||
2541
       _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2542
        _dec->telemetry_mv_bytes=0;
2543
      }
2544
      if(_dec->telemetry_qi_bytes<0||
2545
       _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2546
        _dec->telemetry_qi_bytes=0;
2547
      }
2548
      if(_dec->telemetry_dc_bytes<0||
2549
       _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2550
        _dec->telemetry_dc_bytes=0;
2551
      }
2552
      widths[0]=padw*
2553
       (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2554
      widths[1]=padw*
2555
       (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2556
      widths[2]=padw*
2557
       (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2558
      widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2559
      widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2560
      widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2561
      for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2562
      cairo_set_source_rgba(c,.0,.0,.0,.6);
2563
      cairo_rectangle(c,10,h-33,widths[0]+1,5);
2564
      cairo_rectangle(c,10,h-29,widths[1]+1,5);
2565
      cairo_rectangle(c,10,h-25,widths[2]+1,5);
2566
      cairo_rectangle(c,10,h-21,widths[3]+1,5);
2567
      cairo_rectangle(c,10,h-17,widths[4]+1,5);
2568
      cairo_rectangle(c,10,h-13,widths[5]+1,5);
2569
      cairo_fill(c);
2570
      cairo_set_source_rgb(c,1,0,0);
2571
      cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2572
      cairo_fill(c);
2573
      cairo_set_source_rgb(c,0,1,0);
2574
      cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2575
      cairo_fill(c);
2576
      cairo_set_source_rgb(c,0,0,1);
2577
      cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2578
      cairo_fill(c);
2579
      cairo_set_source_rgb(c,.6,.4,.0);
2580
      cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2581
      cairo_fill(c);
2582
      cairo_set_source_rgb(c,.3,.3,.3);
2583
      cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2584
      cairo_fill(c);
2585
      cairo_set_source_rgb(c,.5,.5,.8);
2586
      cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2587
      cairo_fill(c);
2588
    }
2589
    /*Master qi indicator[s]:*/
2590
    if(_dec->telemetry_qi&0x1){
2591
      cairo_text_extents_t extents;
2592
      char                 buffer[10];
2593
      int                  p;
2594
      int                  y;
2595
      p=0;
2596
      y=h-7.5;
2597
      if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2598
      buffer[p++]=48+_dec->state.qis[0]%10;
2599
      if(_dec->state.nqis>=2){
2600
        buffer[p++]=' ';
2601
        if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2602
        buffer[p++]=48+_dec->state.qis[1]%10;
2603
      }
2604
      if(_dec->state.nqis==3){
2605
        buffer[p++]=' ';
2606
        if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2607
        buffer[p++]=48+_dec->state.qis[2]%10;
2608
      }
2609
      buffer[p++]='\0';
2610
      cairo_select_font_face(c,"sans",
2611
       CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2612
      cairo_set_font_size(c,18);
2613
      cairo_text_extents(c,buffer,&extents);
2614
      cairo_set_source_rgb(c,1,1,1);
2615
      cairo_move_to(c,w-extents.x_advance-10,y);
2616
      cairo_show_text(c,buffer);
2617
      cairo_set_source_rgb(c,0,0,0);
2618
      cairo_move_to(c,w-extents.x_advance-10,y);
2619
      cairo_text_path(c,buffer);
2620
      cairo_set_line_width(c,.8);
2621
      cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2622
      cairo_stroke(c);
2623
    }
2624
    cairo_destroy(c);
2625
  }
2626
  /*Out of the Cairo plane into the telemetry YUV buffer.*/
2627
  _ycbcr[0].data=_dec->telemetry_frame_data;
2628
  _ycbcr[0].stride=_ycbcr[0].width;
2629
  _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2630
  _ycbcr[1].stride=_ycbcr[1].width;
2631
  _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2632
  _ycbcr[2].stride=_ycbcr[2].width;
2633
  y_row=_ycbcr[0].data;
2634
  u_row=_ycbcr[1].data;
2635
  v_row=_ycbcr[2].data;
2636
  rgb_row=data;
2637
  /*This is one of the few places it's worth handling chroma on a
2638
     case-by-case basis.*/
2639
  switch(_dec->state.info.pixel_fmt){
2640
    case TH_PF_420:{
2641
      for(y=0;y<h;y+=2){
2642
        unsigned char *y_row2;
2643
        unsigned char *rgb_row2;
2644
        y_row2=y_row+_ycbcr[0].stride;
2645
        rgb_row2=rgb_row+cstride;
2646
        for(x=0;x<w;x+=2){
2647
          int y;
2648
          int u;
2649
          int v;
2650
          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2651
           +24966*rgb_row[4*x+0]+4207500)/255000;
2652
          y_row[x]=OC_CLAMP255(y);
2653
          y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2654
           +24966*rgb_row[4*x+4]+4207500)/255000;
2655
          y_row[x+1]=OC_CLAMP255(y);
2656
          y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2657
           +24966*rgb_row2[4*x+0]+4207500)/255000;
2658
          y_row2[x]=OC_CLAMP255(y);
2659
          y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2660
           +24966*rgb_row2[4*x+4]+4207500)/255000;
2661
          y_row2[x+1]=OC_CLAMP255(y);
2662
          u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2663
           +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2664
           -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2665
           +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2666
           +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2667
           +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2668
          v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2669
           +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2670
           -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2671
            +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2672
           -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2673
            +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2674
          u_row[x>>1]=OC_CLAMP255(u);
2675
          v_row[x>>1]=OC_CLAMP255(v);
2676
        }
2677
        y_row+=_ycbcr[0].stride*2;
2678
        u_row+=_ycbcr[1].stride;
2679
        v_row+=_ycbcr[2].stride;
2680
        rgb_row+=cstride*2;
2681
      }
2682
    }break;
2683
    case TH_PF_422:{
2684
      for(y=0;y<h;y++){
2685
        for(x=0;x<w;x+=2){
2686
          int y;
2687
          int u;
2688
          int v;
2689
          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2690
           +24966*rgb_row[4*x+0]+4207500)/255000;
2691
          y_row[x]=OC_CLAMP255(y);
2692
          y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2693
           +24966*rgb_row[4*x+4]+4207500)/255000;
2694
          y_row[x+1]=OC_CLAMP255(y);
2695
          u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2696
           -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2697
           +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2698
          v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2699
           -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2700
           -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2701
          u_row[x>>1]=OC_CLAMP255(u);
2702
          v_row[x>>1]=OC_CLAMP255(v);
2703
        }
2704
        y_row+=_ycbcr[0].stride;
2705
        u_row+=_ycbcr[1].stride;
2706
        v_row+=_ycbcr[2].stride;
2707
        rgb_row+=cstride;
2708
      }
2709
    }break;
2710
    /*case TH_PF_444:*/
2711
    default:{
2712
      for(y=0;y<h;y++){
2713
        for(x=0;x<w;x++){
2714
          int y;
2715
          int u;
2716
          int v;
2717
          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2718
           +24966*rgb_row[4*x+0]+4207500)/255000;
2719
          u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2720
           +99232*rgb_row[4*x+0]+29032005)/225930;
2721
          v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2722
           -25536*rgb_row[4*x+0]+45940035)/357510;
2723
          y_row[x]=OC_CLAMP255(y);
2724
          u_row[x]=OC_CLAMP255(u);
2725
          v_row[x]=OC_CLAMP255(v);
2726
        }
2727
        y_row+=_ycbcr[0].stride;
2728
        u_row+=_ycbcr[1].stride;
2729
        v_row+=_ycbcr[2].stride;
2730
        rgb_row+=cstride;
2731
      }
2732
    }break;
2733
  }
2734
  /*Finished.
2735
    Destroy the surface.*/
2736
  cairo_surface_destroy(cs);
2737
}
2738
#endif
2739
2740
int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2741
1.23k
 ogg_int64_t *_granpos){
2742
1.23k
  int ret;
2743
1.23k
  if(_dec==NULL||_op==NULL)return TH_EFAULT;
2744
  /*A completely empty packet indicates a dropped frame and is treated exactly
2745
     like an inter frame with no coded blocks.*/
2746
1.23k
  if(_op->bytes==0){
2747
97
    _dec->state.frame_type=OC_INTER_FRAME;
2748
97
    _dec->state.ntotal_coded_fragis=0;
2749
97
  }
2750
1.13k
  else{
2751
1.13k
    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2752
1.13k
    ret=oc_dec_frame_header_unpack(_dec);
2753
1.13k
    if(ret<0)return ret;
2754
1.12k
    if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2755
711
    else oc_dec_coded_flags_unpack(_dec);
2756
1.12k
  }
2757
  /*If there have been no reference frames, and we need one, initialize one.*/
2758
1.22k
  if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2759
1.22k
   (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2760
808
   _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2761
472
    oc_dec_init_dummy_frame(_dec);
2762
472
  }
2763
  /*If this was an inter frame with no coded blocks...*/
2764
1.22k
  if(_dec->state.ntotal_coded_fragis<=0){
2765
    /*Just update the granule position and return.*/
2766
98
    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2767
98
     _dec->state.info.keyframe_granule_shift)
2768
98
     +(_dec->state.curframe_num-_dec->state.keyframe_num);
2769
98
    _dec->state.curframe_num++;
2770
98
    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2771
98
    return TH_DUPFRAME;
2772
98
  }
2773
1.12k
  else{
2774
1.12k
    th_ycbcr_buffer stripe_buf;
2775
1.12k
    int             stripe_fragy;
2776
1.12k
    int             refi;
2777
1.12k
    int             pli;
2778
1.12k
    int             notstart;
2779
1.12k
    int             notdone;
2780
#ifdef HAVE_CAIRO
2781
    int             telemetry;
2782
    /*Save the current telemetry state.
2783
      This prevents it from being modified in the middle of decoding this
2784
       frame, which could cause us to skip calls to the striped decoding
2785
       callback.*/
2786
    telemetry=_dec->telemetry_mbmode||_dec->telemetry_mv||
2787
     _dec->telemetry_qi||_dec->telemetry_bits;
2788
#endif
2789
    /*Select a free buffer to use for the reconstructed version of this frame.*/
2790
2.08k
    for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2791
2.08k
     refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2792
1.12k
    _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2793
1.12k
    _dec->state.ref_frame_data[OC_FRAME_SELF]=
2794
1.12k
     _dec->state.ref_frame_bufs[refi][0].data;
2795
#if defined(HAVE_CAIRO)
2796
    _dec->telemetry_frame_bytes=_op->bytes;
2797
#endif
2798
1.12k
    if(_dec->state.frame_type==OC_INTRA_FRAME){
2799
418
      _dec->state.keyframe_num=_dec->state.curframe_num;
2800
#if defined(HAVE_CAIRO)
2801
      _dec->telemetry_coding_bytes=
2802
       _dec->telemetry_mode_bytes=
2803
       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2804
#endif
2805
418
    }
2806
710
    else{
2807
#if defined(HAVE_CAIRO)
2808
      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2809
#endif
2810
710
      oc_dec_mb_modes_unpack(_dec);
2811
#if defined(HAVE_CAIRO)
2812
      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2813
#endif
2814
710
      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2815
#if defined(HAVE_CAIRO)
2816
      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2817
#endif
2818
710
    }
2819
1.12k
    oc_dec_block_qis_unpack(_dec);
2820
#if defined(HAVE_CAIRO)
2821
    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2822
#endif
2823
1.12k
    oc_dec_residual_tokens_unpack(_dec);
2824
    /*Update granule position.
2825
      This must be done before the striped decode callbacks so that the
2826
       application knows what to do with the frame data.*/
2827
1.12k
    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2828
1.12k
     _dec->state.info.keyframe_granule_shift)
2829
1.12k
     +(_dec->state.curframe_num-_dec->state.keyframe_num);
2830
1.12k
    _dec->state.curframe_num++;
2831
1.12k
    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2832
    /*All of the rest of the operations -- DC prediction reversal,
2833
       reconstructing coded fragments, copying uncoded fragments, loop
2834
       filtering, extending borders, and out-of-loop post-processing -- should
2835
       be pipelined.
2836
      I.e., DC prediction reversal, reconstruction, and uncoded fragment
2837
       copying are done for one or two super block rows, then loop filtering is
2838
       run as far as it can, then bordering copying, then post-processing.
2839
      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2840
       block rows, and one chroma.
2841
      Otherwise, an MCU consists of one super block row from each plane.
2842
      Inside each MCU, we perform all of the steps on one color plane before
2843
       moving on to the next.
2844
      After reconstruction, the additional filtering stages introduce a delay
2845
       since they need some pixels from the next fragment row.
2846
      Thus the actual number of decoded rows available is slightly smaller for
2847
       the first MCU, and slightly larger for the last.
2848
2849
      This entire process allows us to operate on the data while it is still in
2850
       cache, resulting in big performance improvements.
2851
      An application callback allows further application processing (blitting
2852
       to video memory, color conversion, etc.) to also use the data while it's
2853
       in cache.*/
2854
1.12k
    oc_dec_pipeline_init(_dec,&_dec->pipe);
2855
1.12k
    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2856
1.12k
    notstart=0;
2857
1.12k
    notdone=1;
2858
15.4k
    for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2859
14.3k
      int avail_fragy0;
2860
14.3k
      int avail_fragy_end;
2861
14.3k
      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2862
14.3k
      notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2863
57.3k
      for(pli=0;pli<3;pli++){
2864
42.9k
        oc_fragment_plane *fplane;
2865
42.9k
        int                frag_shift;
2866
42.9k
        int                pp_offset;
2867
42.9k
        int                sdelay;
2868
42.9k
        int                edelay;
2869
42.9k
        fplane=_dec->state.fplanes+pli;
2870
        /*Compute the first and last fragment row of the current MCU for this
2871
           plane.*/
2872
42.9k
        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2873
42.9k
        _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2874
42.9k
        _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2875
42.9k
         _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2876
42.9k
        oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2877
42.9k
        oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2878
42.9k
        sdelay=edelay=0;
2879
42.9k
        if(_dec->pipe.loop_filter){
2880
3.39k
          sdelay+=notstart;
2881
3.39k
          edelay+=notdone;
2882
3.39k
          oc_state_loop_filter_frag_rows(&_dec->state,
2883
3.39k
           _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2884
3.39k
           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2885
3.39k
        }
2886
        /*To fill the borders, we have an additional two pixel delay, since a
2887
           fragment in the next row could filter its top edge, using two pixels
2888
           from a fragment in this row.
2889
          But there's no reason to delay a full fragment between the two.*/
2890
42.9k
        oc_state_borders_fill_rows(&_dec->state,refi,pli,
2891
42.9k
         (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2892
42.9k
         (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2893
        /*Out-of-loop post-processing.*/
2894
42.9k
        pp_offset=3*(pli!=0);
2895
42.9k
        if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2896
          /*Perform de-blocking in one plane.*/
2897
0
          sdelay+=notstart;
2898
0
          edelay+=notdone;
2899
0
          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2900
0
           _dec->state.ref_frame_bufs[refi],pli,
2901
0
           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2902
0
          if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2903
            /*Perform de-ringing in one plane.*/
2904
0
            sdelay+=notstart;
2905
0
            edelay+=notdone;
2906
0
            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2907
0
             _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2908
0
          }
2909
0
        }
2910
        /*If no post-processing is done, we still need to delay a row for the
2911
           loop filter, thanks to the strange filtering order VP3 chose.*/
2912
42.9k
        else if(_dec->pipe.loop_filter){
2913
3.39k
          sdelay+=notstart;
2914
3.39k
          edelay+=notdone;
2915
3.39k
        }
2916
        /*Compute the intersection of the available rows in all planes.
2917
          If chroma is sub-sampled, the effect of each of its delays is
2918
           doubled, but luma might have more post-processing filters enabled
2919
           than chroma, so we don't know up front which one is the limiting
2920
           factor.*/
2921
42.9k
        avail_fragy0=OC_MINI(avail_fragy0,
2922
42.9k
         _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2923
42.9k
        avail_fragy_end=OC_MINI(avail_fragy_end,
2924
42.9k
         _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2925
42.9k
      }
2926
#ifdef HAVE_CAIRO
2927
      if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
2928
#else
2929
14.3k
      if(_dec->stripe_cb.stripe_decoded!=NULL){
2930
0
#endif
2931
        /*The callback might want to use the FPU, so let's make sure they can.
2932
          We violate all kinds of ABI restrictions by not doing this until
2933
           now, but none of them actually matter since we don't use floating
2934
           point ourselves.*/
2935
0
        oc_restore_fpu(&_dec->state);
2936
        /*Make the callback, ensuring we flip the sense of the "start" and
2937
           "end" of the available region upside down.*/
2938
0
        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2939
0
         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2940
0
         _dec->state.fplanes[0].nvfrags-avail_fragy0);
2941
0
      }
2942
14.3k
      notstart=1;
2943
14.3k
    }
2944
    /*Finish filling in the reference frame borders.*/
2945
4.51k
    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2946
    /*Update the reference frame indices.*/
2947
1.12k
    if(_dec->state.frame_type==OC_INTRA_FRAME){
2948
      /*The new frame becomes both the previous and gold reference frames.*/
2949
418
      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2950
418
       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2951
418
       _dec->state.ref_frame_idx[OC_FRAME_SELF];
2952
418
      _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2953
418
       _dec->state.ref_frame_data[OC_FRAME_PREV]=
2954
418
       _dec->state.ref_frame_data[OC_FRAME_SELF];
2955
418
    }
2956
710
    else{
2957
      /*Otherwise, just replace the previous reference frame.*/
2958
710
      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2959
710
       _dec->state.ref_frame_idx[OC_FRAME_SELF];
2960
710
      _dec->state.ref_frame_data[OC_FRAME_PREV]=
2961
710
       _dec->state.ref_frame_data[OC_FRAME_SELF];
2962
710
    }
2963
    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2964
       gamma values, if nothing else).*/
2965
1.12k
    oc_restore_fpu(&_dec->state);
2966
#ifdef HAVE_CAIRO
2967
    /*If telemetry ioctls are active, we need to draw to the output buffer.*/
2968
    if(telemetry){
2969
      oc_render_telemetry(_dec,stripe_buf,telemetry);
2970
      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
2971
      /*If we had a striped decoding callback, we skipped calling it above
2972
         (because the telemetry wasn't rendered yet).
2973
        Call it now with the whole frame.*/
2974
      if(_dec->stripe_cb.stripe_decoded!=NULL){
2975
        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
2976
         stripe_buf,0,_dec->state.fplanes[0].nvfrags);
2977
      }
2978
    }
2979
#endif
2980
#if defined(OC_DUMP_IMAGES)
2981
    /*We only dump images if there were some coded blocks.*/
2982
    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2983
#endif
2984
1.12k
    return 0;
2985
1.12k
  }
2986
1.22k
}
2987
2988
1.12k
int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2989
1.12k
  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2990
1.12k
  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2991
1.12k
  return 0;
2992
1.12k
}