/src/theora/lib/enquant.c
Line | Count | Source (jump to first uncovered line) |
1 | | /******************************************************************** |
2 | | * * |
3 | | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
4 | | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
5 | | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
6 | | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
7 | | * * |
8 | | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
9 | | * by the Xiph.Org Foundation http://www.xiph.org/ * |
10 | | * * |
11 | | ******************************************************************** |
12 | | |
13 | | function: |
14 | | last mod: $Id$ |
15 | | |
16 | | ********************************************************************/ |
17 | | #include <stdlib.h> |
18 | | #include <string.h> |
19 | | #include "encint.h" |
20 | | |
21 | | |
22 | | |
23 | 3.44k | int oc_quant_params_clone(th_quant_info *_dst,const th_quant_info *_src){ |
24 | 3.44k | int i; |
25 | 3.44k | memcpy(_dst,_src,sizeof(*_dst)); |
26 | 3.44k | memset(_dst->qi_ranges,0,sizeof(_dst->qi_ranges)); |
27 | 24.1k | for(i=0;i<6;i++){ |
28 | 20.6k | int nranges; |
29 | 20.6k | int qti; |
30 | 20.6k | int pli; |
31 | 20.6k | int qtj; |
32 | 20.6k | int plj; |
33 | 20.6k | int pdup; |
34 | 20.6k | int qdup; |
35 | 20.6k | qti=i/3; |
36 | 20.6k | pli=i%3; |
37 | 20.6k | qtj=(i-1)/3; |
38 | 20.6k | plj=(i-1)%3; |
39 | 20.6k | nranges=_src->qi_ranges[qti][pli].nranges; |
40 | | /*Check for those duplicates that can be cleanly handled by |
41 | | oc_quant_params_clear().*/ |
42 | 20.6k | pdup=i>0&&nranges<=_src->qi_ranges[qtj][plj].nranges; |
43 | 20.6k | qdup=qti>0&&nranges<=_src->qi_ranges[0][pli].nranges; |
44 | 20.6k | _dst->qi_ranges[qti][pli].nranges=nranges; |
45 | 20.6k | if(pdup&&_src->qi_ranges[qti][pli].sizes==_src->qi_ranges[qtj][plj].sizes){ |
46 | 17.2k | _dst->qi_ranges[qti][pli].sizes=_dst->qi_ranges[qtj][plj].sizes; |
47 | 17.2k | } |
48 | 3.44k | else if(qdup&&_src->qi_ranges[1][pli].sizes==_src->qi_ranges[0][pli].sizes){ |
49 | 0 | _dst->qi_ranges[1][pli].sizes=_dst->qi_ranges[0][pli].sizes; |
50 | 0 | } |
51 | 3.44k | else{ |
52 | 3.44k | int *sizes; |
53 | 3.44k | sizes=(int *)_ogg_malloc(nranges*sizeof(*sizes)); |
54 | | /*Note: The caller is responsible for cleaning up any partially |
55 | | constructed qinfo.*/ |
56 | 3.44k | if(sizes==NULL)return TH_EFAULT; |
57 | 3.44k | memcpy(sizes,_src->qi_ranges[qti][pli].sizes,nranges*sizeof(*sizes)); |
58 | 3.44k | _dst->qi_ranges[qti][pli].sizes=sizes; |
59 | 3.44k | } |
60 | 20.6k | if(pdup&&_src->qi_ranges[qti][pli].base_matrices== |
61 | 17.2k | _src->qi_ranges[qtj][plj].base_matrices){ |
62 | 10.3k | _dst->qi_ranges[qti][pli].base_matrices= |
63 | 10.3k | _dst->qi_ranges[qtj][plj].base_matrices; |
64 | 10.3k | } |
65 | 10.3k | else if(qdup&&_src->qi_ranges[1][pli].base_matrices== |
66 | 3.44k | _src->qi_ranges[0][pli].base_matrices){ |
67 | 0 | _dst->qi_ranges[1][pli].base_matrices= |
68 | 0 | _dst->qi_ranges[0][pli].base_matrices; |
69 | 0 | } |
70 | 10.3k | else{ |
71 | 10.3k | th_quant_base *base_matrices; |
72 | 10.3k | base_matrices=(th_quant_base *)_ogg_malloc( |
73 | 10.3k | (nranges+1)*sizeof(*base_matrices)); |
74 | | /*Note: The caller is responsible for cleaning up any partially |
75 | | constructed qinfo.*/ |
76 | 10.3k | if(base_matrices==NULL)return TH_EFAULT; |
77 | 10.3k | memcpy(base_matrices,_src->qi_ranges[qti][pli].base_matrices, |
78 | 10.3k | (nranges+1)*sizeof(*base_matrices)); |
79 | 10.3k | _dst->qi_ranges[qti][pli].base_matrices= |
80 | 10.3k | (const th_quant_base *)base_matrices; |
81 | 10.3k | } |
82 | 20.6k | } |
83 | 3.44k | return 0; |
84 | 3.44k | } |
85 | | |
86 | 3.44k | void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ |
87 | 3.44k | const th_quant_ranges *qranges; |
88 | 3.44k | const th_quant_base *base_mats[2*3*64]; |
89 | 3.44k | int indices[2][3][64]; |
90 | 3.44k | int nbase_mats; |
91 | 3.44k | int nbits; |
92 | 3.44k | int ci; |
93 | 3.44k | int qi; |
94 | 3.44k | int qri; |
95 | 3.44k | int qti; |
96 | 3.44k | int pli; |
97 | 3.44k | int qtj; |
98 | 3.44k | int plj; |
99 | 3.44k | int bmi; |
100 | 3.44k | int i; |
101 | 3.44k | i=_qinfo->loop_filter_limits[0]; |
102 | 220k | for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]); |
103 | 3.44k | nbits=OC_ILOG_32(i); |
104 | 3.44k | oggpackB_write(_opb,nbits,3); |
105 | 223k | for(qi=0;qi<64;qi++){ |
106 | 220k | oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits); |
107 | 220k | } |
108 | | /*580 bits for VP3.*/ |
109 | 3.44k | i=1; |
110 | 223k | for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i); |
111 | 3.44k | nbits=OC_ILOGNZ_32(i); |
112 | 3.44k | oggpackB_write(_opb,nbits-1,4); |
113 | 223k | for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits); |
114 | | /*516 bits for VP3.*/ |
115 | 3.44k | i=1; |
116 | 223k | for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i); |
117 | 3.44k | nbits=OC_ILOGNZ_32(i); |
118 | 3.44k | oggpackB_write(_opb,nbits-1,4); |
119 | 223k | for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits); |
120 | | /*Consolidate any duplicate base matrices.*/ |
121 | 3.44k | nbase_mats=0; |
122 | 27.5k | for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ |
123 | 20.6k | qranges=_qinfo->qi_ranges[qti]+pli; |
124 | 103k | for(qri=0;qri<=qranges->nranges;qri++){ |
125 | 647k | for(bmi=0;;bmi++){ |
126 | 647k | if(bmi>=nbase_mats){ |
127 | 41.3k | base_mats[bmi]=qranges->base_matrices+qri; |
128 | 41.3k | indices[qti][pli][qri]=nbase_mats++; |
129 | 41.3k | break; |
130 | 41.3k | } |
131 | 606k | else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri], |
132 | 606k | sizeof(base_mats[bmi][0]))==0){ |
133 | 41.3k | indices[qti][pli][qri]=bmi; |
134 | 41.3k | break; |
135 | 41.3k | } |
136 | 647k | } |
137 | 82.7k | } |
138 | 20.6k | } |
139 | | /*Write out the list of unique base matrices. |
140 | | 1545 bits for VP3 matrices.*/ |
141 | 3.44k | oggpackB_write(_opb,nbase_mats-1,9); |
142 | 44.7k | for(bmi=0;bmi<nbase_mats;bmi++){ |
143 | 2.68M | for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8); |
144 | 41.3k | } |
145 | | /*Now store quant ranges and their associated indices into the base matrix |
146 | | list. |
147 | | 46 bits for VP3 matrices.*/ |
148 | 3.44k | nbits=OC_ILOG_32(nbase_mats-1); |
149 | 24.1k | for(i=0;i<6;i++){ |
150 | 20.6k | qti=i/3; |
151 | 20.6k | pli=i%3; |
152 | 20.6k | qranges=_qinfo->qi_ranges[qti]+pli; |
153 | 20.6k | if(i>0){ |
154 | 17.2k | if(qti>0){ |
155 | 10.3k | if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&& |
156 | 10.3k | memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes, |
157 | 10.3k | qranges->nranges*sizeof(qranges->sizes[0]))==0&& |
158 | 10.3k | memcmp(indices[qti][pli],indices[qti-1][pli], |
159 | 10.3k | (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ |
160 | 0 | oggpackB_write(_opb,1,2); |
161 | 0 | continue; |
162 | 0 | } |
163 | 10.3k | } |
164 | 17.2k | qtj=(i-1)/3; |
165 | 17.2k | plj=(i-1)%3; |
166 | 17.2k | if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&& |
167 | 17.2k | memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes, |
168 | 17.2k | qranges->nranges*sizeof(qranges->sizes[0]))==0&& |
169 | 17.2k | memcmp(indices[qti][pli],indices[qtj][plj], |
170 | 17.2k | (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ |
171 | 10.3k | oggpackB_write(_opb,0,1+(qti>0)); |
172 | 10.3k | continue; |
173 | 10.3k | } |
174 | 6.89k | oggpackB_write(_opb,1,1); |
175 | 6.89k | } |
176 | 10.3k | oggpackB_write(_opb,indices[qti][pli][0],nbits); |
177 | 41.3k | for(qi=qri=0;qi<63;qri++){ |
178 | 31.0k | oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi)); |
179 | 31.0k | qi+=qranges->sizes[qri]; |
180 | 31.0k | oggpackB_write(_opb,indices[qti][pli][qri+1],nbits); |
181 | 31.0k | } |
182 | 10.3k | } |
183 | 3.44k | } |
184 | | |
185 | 84.6M | void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ |
186 | 84.6M | ogg_uint32_t t; |
187 | 84.6M | int l; |
188 | 84.6M | _d<<=1; |
189 | 84.6M | l=OC_ILOGNZ_32(_d)-1; |
190 | 84.6M | t=1+((ogg_uint32_t)1<<16+l)/_d; |
191 | 84.6M | _this->m=(ogg_int16_t)(t-0x10000); |
192 | 84.6M | _this->l=l; |
193 | 84.6M | } |
194 | | |
195 | | void oc_enc_enquant_table_init_c(void *_enquant, |
196 | 0 | const ogg_uint16_t _dequant[64]){ |
197 | 0 | oc_iquant *enquant; |
198 | 0 | int zzi; |
199 | | /*In the original VP3.2 code, the rounding offset and the size of the |
200 | | dead zone around 0 were controlled by a "sharpness" parameter. |
201 | | We now R-D optimize the tokens for each block after quantization, |
202 | | so the rounding offset should always be 1/2, and an explicit dead |
203 | | zone is unnecessary. |
204 | | Hence, all of that VP3.2 code is gone from here, and the remaining |
205 | | floating point code has been implemented as equivalent integer |
206 | | code with exact precision.*/ |
207 | 0 | enquant=(oc_iquant *)_enquant; |
208 | 0 | for(zzi=0;zzi<64;zzi++)oc_iquant_init(enquant+zzi,_dequant[zzi]); |
209 | 0 | } |
210 | | |
211 | 0 | void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis){ |
212 | 0 | int pli; |
213 | 0 | int qii; |
214 | 0 | int qti; |
215 | 0 | for(pli=0;pli<3;pli++)for(qii=1;qii<_nqis;qii++)for(qti=0;qti<2;qti++){ |
216 | 0 | *((oc_iquant *)_enquant[pli][qii][qti])= |
217 | 0 | *((oc_iquant *)_enquant[pli][0][qti]); |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | | int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64], |
222 | 0 | const ogg_uint16_t _dequant[64],const void *_enquant){ |
223 | 0 | const oc_iquant *enquant; |
224 | 0 | int nonzero; |
225 | 0 | int zzi; |
226 | 0 | int val; |
227 | 0 | int d; |
228 | 0 | int s; |
229 | 0 | enquant=(const oc_iquant *)_enquant; |
230 | 0 | nonzero=0; |
231 | 0 | for(zzi=0;zzi<64;zzi++){ |
232 | 0 | val=_dct[zzi]; |
233 | 0 | d=_dequant[zzi]; |
234 | 0 | val=val<<1; |
235 | 0 | if(abs(val)>=d){ |
236 | 0 | s=OC_SIGNMASK(val); |
237 | | /*The bias added here rounds ties away from zero, since token |
238 | | optimization can only decrease the magnitude of the quantized |
239 | | value.*/ |
240 | 0 | val+=d+s^s; |
241 | | /*Note the arithmetic right shift is not guaranteed by ANSI C. |
242 | | Hopefully no one still uses ones-complement architectures.*/ |
243 | 0 | val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s; |
244 | 0 | _qdct[zzi]=(ogg_int16_t)val; |
245 | 0 | nonzero=zzi; |
246 | 0 | } |
247 | 0 | else _qdct[zzi]=0; |
248 | 0 | } |
249 | 0 | return nonzero; |
250 | 0 | } |
251 | | |
252 | | |
253 | | |
254 | | /*This table gives the square root of the fraction of the squared magnitude of |
255 | | each DCT coefficient relative to the total, scaled by 2**16, for both INTRA |
256 | | and INTER modes. |
257 | | These values were measured after motion-compensated prediction, before |
258 | | quantization, over a large set of test video (from QCIF to 1080p) encoded at |
259 | | all possible rates. |
260 | | The DC coefficient takes into account the DPCM prediction (using the |
261 | | quantized values from neighboring blocks, as the encoder does, but still |
262 | | before quantization of the coefficient in the current block). |
263 | | The results differ significantly from the expected variance (e.g., using an |
264 | | AR(1) model of the signal with rho=0.95, as is frequently done to compute |
265 | | the coding gain of the DCT). |
266 | | We use them to estimate an "average" quantizer for a given quantizer matrix, |
267 | | as this is used to parameterize a number of the rate control decisions. |
268 | | These values are themselves probably quantizer-matrix dependent, since the |
269 | | shape of the matrix affects the noise distribution in the reference frames, |
270 | | but they should at least give us _some_ amount of adaptivity to different |
271 | | matrices, as opposed to hard-coding a table of average Q values for the |
272 | | current set. |
273 | | The main features they capture are that a) only a few of the quantizers in |
274 | | the upper-left corner contribute anything significant at all (though INTER |
275 | | mode is significantly flatter) and b) the DPCM prediction of the DC |
276 | | coefficient gives a very minor improvement in the INTRA case and a quite |
277 | | significant one in the INTER case (over the expected variance).*/ |
278 | | static const ogg_uint16_t OC_RPSD[2][64]={ |
279 | | { |
280 | | 52725,17370,10399, 6867, 5115, 3798, 2942, 2076, |
281 | | 17370, 9900, 6948, 4994, 3836, 2869, 2229, 1619, |
282 | | 10399, 6948, 5516, 4202, 3376, 2573, 2015, 1461, |
283 | | 6867, 4994, 4202, 3377, 2800, 2164, 1718, 1243, |
284 | | 5115, 3836, 3376, 2800, 2391, 1884, 1530, 1091, |
285 | | 3798, 2869, 2573, 2164, 1884, 1495, 1212, 873, |
286 | | 2942, 2229, 2015, 1718, 1530, 1212, 1001, 704, |
287 | | 2076, 1619, 1461, 1243, 1091, 873, 704, 474 |
288 | | }, |
289 | | { |
290 | | 23411,15604,13529,11601,10683, 8958, 7840, 6142, |
291 | | 15604,11901,10718, 9108, 8290, 6961, 6023, 4487, |
292 | | 13529,10718, 9961, 8527, 7945, 6689, 5742, 4333, |
293 | | 11601, 9108, 8527, 7414, 7084, 5923, 5175, 3743, |
294 | | 10683, 8290, 7945, 7084, 6771, 5754, 4793, 3504, |
295 | | 8958, 6961, 6689, 5923, 5754, 4679, 3936, 2989, |
296 | | 7840, 6023, 5742, 5175, 4793, 3936, 3522, 2558, |
297 | | 6142, 4487, 4333, 3743, 3504, 2989, 2558, 1829 |
298 | | } |
299 | | }; |
300 | | |
301 | | /*The fraction of the squared magnitude of the residuals in each color channel |
302 | | relative to the total, scaled by 2**16, for each pixel format. |
303 | | These values were measured after motion-compensated prediction, before |
304 | | quantization, over a large set of test video encoded at all possible rates. |
305 | | TODO: These values are only from INTER frames; they should be re-measured for |
306 | | INTRA frames.*/ |
307 | | static const ogg_uint16_t OC_PCD[4][3]={ |
308 | | {59926, 3038, 2572}, |
309 | | {55201, 5597, 4738}, |
310 | | {55201, 5597, 4738}, |
311 | | {47682, 9669, 8185} |
312 | | }; |
313 | | |
314 | | |
315 | | /*Compute "average" quantizers for each qi level to use for rate control. |
316 | | We do one for each color channel, as well as an average across color |
317 | | channels, separately for INTER and INTRA, since their behavior is very |
318 | | different. |
319 | | The basic approach is to compute a harmonic average of the squared quantizer, |
320 | | weighted by the expected squared magnitude of the DCT coefficients. |
321 | | Under the (not quite true) assumption that DCT coefficients are |
322 | | Laplacian-distributed, this preserves the product Q*lambda, where |
323 | | lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be |
324 | | confused with the lambda used in R-D optimization throughout most of the |
325 | | rest of the code), when the distributions from multiple coefficients are |
326 | | pooled. |
327 | | The value Q*lambda completely determines the entropy of coefficients drawn |
328 | | from a Laplacian distribution, and thus the expected bitrate.*/ |
329 | | void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64], |
330 | | ogg_int16_t _log_plq[64][3][2],ogg_uint16_t _chroma_rd_scale[2][64][2], |
331 | 3.44k | ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){ |
332 | 3.44k | int qi; |
333 | 3.44k | int pli; |
334 | 3.44k | int qti; |
335 | 3.44k | int ci; |
336 | 447k | for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){ |
337 | 441k | ogg_int64_t q2; |
338 | 441k | ogg_uint32_t qp[3]; |
339 | 441k | ogg_uint32_t cqp; |
340 | 441k | ogg_uint32_t d; |
341 | 441k | q2=0; |
342 | 1.76M | for(pli=0;pli<3;pli++){ |
343 | 1.32M | qp[pli]=0; |
344 | 86.0M | for(ci=0;ci<64;ci++){ |
345 | 84.6M | unsigned rq; |
346 | 84.6M | unsigned qd; |
347 | 84.6M | qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]]; |
348 | 84.6M | rq=(OC_RPSD[qti][ci]+(qd>>1))/qd; |
349 | 84.6M | qp[pli]+=rq*(ogg_uint32_t)rq; |
350 | 84.6M | } |
351 | 1.32M | q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp[pli]; |
352 | | /*plq=1.0/sqrt(qp)*/ |
353 | 1.32M | _log_plq[qi][pli][qti]= |
354 | 1.32M | (ogg_int16_t)(OC_Q10(32)-oc_blog32_q10(qp[pli])>>1); |
355 | 1.32M | } |
356 | 441k | d=OC_PCD[_pixel_fmt][1]+OC_PCD[_pixel_fmt][2]; |
357 | 441k | cqp=(ogg_uint32_t)((OC_PCD[_pixel_fmt][1]*(ogg_int64_t)qp[1]+ |
358 | 441k | OC_PCD[_pixel_fmt][2]*(ogg_int64_t)qp[2]+(d>>1))/d); |
359 | | /*chroma_rd_scale=clamp(0.25,cqp/qp[0],4)*/ |
360 | 441k | d=OC_MAXI(qp[0]+(1<<OC_RD_SCALE_BITS-1)>>OC_RD_SCALE_BITS,1); |
361 | 441k | d=OC_CLAMPI(1<<OC_RD_SCALE_BITS-2,(cqp+(d>>1))/d,4<<OC_RD_SCALE_BITS); |
362 | 441k | _chroma_rd_scale[qti][qi][0]=(ogg_int16_t)d; |
363 | | /*chroma_rd_iscale=clamp(0.25,qp[0]/cqp,4)*/ |
364 | 441k | d=OC_MAXI(OC_RD_ISCALE(cqp,1),1); |
365 | 441k | d=OC_CLAMPI(1<<OC_RD_ISCALE_BITS-2,(qp[0]+(d>>1))/d,4<<OC_RD_ISCALE_BITS); |
366 | 441k | _chroma_rd_scale[qti][qi][1]=(ogg_int16_t)d; |
367 | | /*qavg=1.0/sqrt(q2).*/ |
368 | 441k | _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1; |
369 | 441k | } |
370 | 3.44k | } |