/src/theora/lib/enquant.c
Line | Count | Source (jump to first uncovered line) |
1 | | /******************************************************************** |
2 | | * * |
3 | | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
4 | | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
5 | | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
6 | | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
7 | | * * |
8 | | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
9 | | * by the Xiph.Org Foundation https://www.xiph.org/ * |
10 | | * * |
11 | | ******************************************************************** |
12 | | |
13 | | function: |
14 | | |
15 | | ********************************************************************/ |
16 | | #include <stdlib.h> |
17 | | #include <string.h> |
18 | | #include "encint.h" |
19 | | |
20 | | |
21 | | |
22 | 3.41k | int oc_quant_params_clone(th_quant_info *_dst,const th_quant_info *_src){ |
23 | 3.41k | int i; |
24 | 3.41k | memcpy(_dst,_src,sizeof(*_dst)); |
25 | 3.41k | memset(_dst->qi_ranges,0,sizeof(_dst->qi_ranges)); |
26 | 23.9k | for(i=0;i<6;i++){ |
27 | 20.5k | int nranges; |
28 | 20.5k | int qti; |
29 | 20.5k | int pli; |
30 | 20.5k | int qtj; |
31 | 20.5k | int plj; |
32 | 20.5k | int pdup; |
33 | 20.5k | int qdup; |
34 | 20.5k | qti=i/3; |
35 | 20.5k | pli=i%3; |
36 | 20.5k | qtj=(i-1)/3; |
37 | 20.5k | plj=(i-1)%3; |
38 | 20.5k | nranges=_src->qi_ranges[qti][pli].nranges; |
39 | | /*Check for those duplicates that can be cleanly handled by |
40 | | oc_quant_params_clear().*/ |
41 | 20.5k | pdup=i>0&&nranges<=_src->qi_ranges[qtj][plj].nranges; |
42 | 20.5k | qdup=qti>0&&nranges<=_src->qi_ranges[0][pli].nranges; |
43 | 20.5k | _dst->qi_ranges[qti][pli].nranges=nranges; |
44 | 20.5k | if(pdup&&_src->qi_ranges[qti][pli].sizes==_src->qi_ranges[qtj][plj].sizes){ |
45 | 17.0k | _dst->qi_ranges[qti][pli].sizes=_dst->qi_ranges[qtj][plj].sizes; |
46 | 17.0k | } |
47 | 3.41k | else if(qdup&&_src->qi_ranges[1][pli].sizes==_src->qi_ranges[0][pli].sizes){ |
48 | 0 | _dst->qi_ranges[1][pli].sizes=_dst->qi_ranges[0][pli].sizes; |
49 | 0 | } |
50 | 3.41k | else{ |
51 | 3.41k | int *sizes; |
52 | 3.41k | sizes=(int *)_ogg_malloc(nranges*sizeof(*sizes)); |
53 | | /*Note: The caller is responsible for cleaning up any partially |
54 | | constructed qinfo.*/ |
55 | 3.41k | if(sizes==NULL)return TH_EFAULT; |
56 | 3.41k | memcpy(sizes,_src->qi_ranges[qti][pli].sizes,nranges*sizeof(*sizes)); |
57 | 3.41k | _dst->qi_ranges[qti][pli].sizes=sizes; |
58 | 3.41k | } |
59 | 20.5k | if(pdup&&_src->qi_ranges[qti][pli].base_matrices== |
60 | 17.0k | _src->qi_ranges[qtj][plj].base_matrices){ |
61 | 10.2k | _dst->qi_ranges[qti][pli].base_matrices= |
62 | 10.2k | _dst->qi_ranges[qtj][plj].base_matrices; |
63 | 10.2k | } |
64 | 10.2k | else if(qdup&&_src->qi_ranges[1][pli].base_matrices== |
65 | 3.41k | _src->qi_ranges[0][pli].base_matrices){ |
66 | 0 | _dst->qi_ranges[1][pli].base_matrices= |
67 | 0 | _dst->qi_ranges[0][pli].base_matrices; |
68 | 0 | } |
69 | 10.2k | else{ |
70 | 10.2k | th_quant_base *base_matrices; |
71 | 10.2k | base_matrices=(th_quant_base *)_ogg_malloc( |
72 | 10.2k | (nranges+1)*sizeof(*base_matrices)); |
73 | | /*Note: The caller is responsible for cleaning up any partially |
74 | | constructed qinfo.*/ |
75 | 10.2k | if(base_matrices==NULL)return TH_EFAULT; |
76 | 10.2k | memcpy(base_matrices,_src->qi_ranges[qti][pli].base_matrices, |
77 | 10.2k | (nranges+1)*sizeof(*base_matrices)); |
78 | 10.2k | _dst->qi_ranges[qti][pli].base_matrices= |
79 | 10.2k | (const th_quant_base *)base_matrices; |
80 | 10.2k | } |
81 | 20.5k | } |
82 | 3.41k | return 0; |
83 | 3.41k | } |
84 | | |
85 | 3.41k | void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ |
86 | 3.41k | const th_quant_ranges *qranges; |
87 | 3.41k | const th_quant_base *base_mats[2*3*64]; |
88 | 3.41k | int indices[2][3][64]; |
89 | 3.41k | int nbase_mats; |
90 | 3.41k | int nbits; |
91 | 3.41k | int ci; |
92 | 3.41k | int qi; |
93 | 3.41k | int qri; |
94 | 3.41k | int qti; |
95 | 3.41k | int pli; |
96 | 3.41k | int qtj; |
97 | 3.41k | int plj; |
98 | 3.41k | int bmi; |
99 | 3.41k | int i; |
100 | 3.41k | i=_qinfo->loop_filter_limits[0]; |
101 | 218k | for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]); |
102 | 3.41k | nbits=OC_ILOG_32(i); |
103 | 3.41k | oggpackB_write(_opb,nbits,3); |
104 | 222k | for(qi=0;qi<64;qi++){ |
105 | 218k | oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits); |
106 | 218k | } |
107 | | /*580 bits for VP3.*/ |
108 | 3.41k | i=1; |
109 | 222k | for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i); |
110 | 3.41k | nbits=OC_ILOGNZ_32(i); |
111 | 3.41k | oggpackB_write(_opb,nbits-1,4); |
112 | 222k | for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits); |
113 | | /*516 bits for VP3.*/ |
114 | 3.41k | i=1; |
115 | 222k | for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i); |
116 | 3.41k | nbits=OC_ILOGNZ_32(i); |
117 | 3.41k | oggpackB_write(_opb,nbits-1,4); |
118 | 222k | for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits); |
119 | | /*Consolidate any duplicate base matrices.*/ |
120 | 3.41k | nbase_mats=0; |
121 | 27.3k | for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ |
122 | 20.5k | qranges=_qinfo->qi_ranges[qti]+pli; |
123 | 102k | for(qri=0;qri<=qranges->nranges;qri++){ |
124 | 642k | for(bmi=0;;bmi++){ |
125 | 642k | if(bmi>=nbase_mats){ |
126 | 41.0k | base_mats[bmi]=qranges->base_matrices+qri; |
127 | 41.0k | indices[qti][pli][qri]=nbase_mats++; |
128 | 41.0k | break; |
129 | 41.0k | } |
130 | 601k | else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri], |
131 | 601k | sizeof(base_mats[bmi][0]))==0){ |
132 | 41.0k | indices[qti][pli][qri]=bmi; |
133 | 41.0k | break; |
134 | 41.0k | } |
135 | 642k | } |
136 | 82.0k | } |
137 | 20.5k | } |
138 | | /*Write out the list of unique base matrices. |
139 | | 1545 bits for VP3 matrices.*/ |
140 | 3.41k | oggpackB_write(_opb,nbase_mats-1,9); |
141 | 44.4k | for(bmi=0;bmi<nbase_mats;bmi++){ |
142 | 2.66M | for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8); |
143 | 41.0k | } |
144 | | /*Now store quant ranges and their associated indices into the base matrix |
145 | | list. |
146 | | 46 bits for VP3 matrices.*/ |
147 | 3.41k | nbits=OC_ILOG_32(nbase_mats-1); |
148 | 23.9k | for(i=0;i<6;i++){ |
149 | 20.5k | qti=i/3; |
150 | 20.5k | pli=i%3; |
151 | 20.5k | qranges=_qinfo->qi_ranges[qti]+pli; |
152 | 20.5k | if(i>0){ |
153 | 17.0k | if(qti>0){ |
154 | 10.2k | if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&& |
155 | 10.2k | memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes, |
156 | 10.2k | qranges->nranges*sizeof(qranges->sizes[0]))==0&& |
157 | 10.2k | memcmp(indices[qti][pli],indices[qti-1][pli], |
158 | 10.2k | (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ |
159 | 0 | oggpackB_write(_opb,1,2); |
160 | 0 | continue; |
161 | 0 | } |
162 | 10.2k | } |
163 | 17.0k | qtj=(i-1)/3; |
164 | 17.0k | plj=(i-1)%3; |
165 | 17.0k | if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&& |
166 | 17.0k | memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes, |
167 | 17.0k | qranges->nranges*sizeof(qranges->sizes[0]))==0&& |
168 | 17.0k | memcmp(indices[qti][pli],indices[qtj][plj], |
169 | 17.0k | (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ |
170 | 10.2k | oggpackB_write(_opb,0,1+(qti>0)); |
171 | 10.2k | continue; |
172 | 10.2k | } |
173 | 6.83k | oggpackB_write(_opb,1,1); |
174 | 6.83k | } |
175 | 10.2k | oggpackB_write(_opb,indices[qti][pli][0],nbits); |
176 | 41.0k | for(qi=qri=0;qi<63;qri++){ |
177 | 30.7k | oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi)); |
178 | 30.7k | qi+=qranges->sizes[qri]; |
179 | 30.7k | oggpackB_write(_opb,indices[qti][pli][qri+1],nbits); |
180 | 30.7k | } |
181 | 10.2k | } |
182 | 3.41k | } |
183 | | |
184 | 83.9M | void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ |
185 | 83.9M | ogg_uint32_t t; |
186 | 83.9M | int l; |
187 | 83.9M | _d<<=1; |
188 | 83.9M | l=OC_ILOGNZ_32(_d)-1; |
189 | 83.9M | t=1+((ogg_uint32_t)1<<16+l)/_d; |
190 | 83.9M | _this->m=(ogg_int16_t)(t-0x10000); |
191 | 83.9M | _this->l=l; |
192 | 83.9M | } |
193 | | |
194 | | void oc_enc_enquant_table_init_c(void *_enquant, |
195 | 0 | const ogg_uint16_t _dequant[64]){ |
196 | 0 | oc_iquant *enquant; |
197 | 0 | int zzi; |
198 | | /*In the original VP3.2 code, the rounding offset and the size of the |
199 | | dead zone around 0 were controlled by a "sharpness" parameter. |
200 | | We now R-D optimize the tokens for each block after quantization, |
201 | | so the rounding offset should always be 1/2, and an explicit dead |
202 | | zone is unnecessary. |
203 | | Hence, all of that VP3.2 code is gone from here, and the remaining |
204 | | floating point code has been implemented as equivalent integer |
205 | | code with exact precision.*/ |
206 | 0 | enquant=(oc_iquant *)_enquant; |
207 | 0 | for(zzi=0;zzi<64;zzi++)oc_iquant_init(enquant+zzi,_dequant[zzi]); |
208 | 0 | } |
209 | | |
210 | 0 | void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis){ |
211 | 0 | int pli; |
212 | 0 | int qii; |
213 | 0 | int qti; |
214 | 0 | for(pli=0;pli<3;pli++)for(qii=1;qii<_nqis;qii++)for(qti=0;qti<2;qti++){ |
215 | 0 | *((oc_iquant *)_enquant[pli][qii][qti])= |
216 | 0 | *((oc_iquant *)_enquant[pli][0][qti]); |
217 | 0 | } |
218 | 0 | } |
219 | | |
220 | | int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64], |
221 | 0 | const ogg_uint16_t _dequant[64],const void *_enquant){ |
222 | 0 | const oc_iquant *enquant; |
223 | 0 | int nonzero; |
224 | 0 | int zzi; |
225 | 0 | int val; |
226 | 0 | int d; |
227 | 0 | int s; |
228 | 0 | enquant=(const oc_iquant *)_enquant; |
229 | 0 | nonzero=0; |
230 | 0 | for(zzi=0;zzi<64;zzi++){ |
231 | 0 | val=_dct[zzi]; |
232 | 0 | d=_dequant[zzi]; |
233 | 0 | val=val<<1; |
234 | 0 | if(abs(val)>=d){ |
235 | 0 | s=OC_SIGNMASK(val); |
236 | | /*The bias added here rounds ties away from zero, since token |
237 | | optimization can only decrease the magnitude of the quantized |
238 | | value.*/ |
239 | 0 | val+=d+s^s; |
240 | | /*Note the arithmetic right shift is not guaranteed by ANSI C. |
241 | | Hopefully no one still uses ones-complement architectures.*/ |
242 | 0 | val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s; |
243 | 0 | _qdct[zzi]=(ogg_int16_t)val; |
244 | 0 | nonzero=zzi; |
245 | 0 | } |
246 | 0 | else _qdct[zzi]=0; |
247 | 0 | } |
248 | 0 | return nonzero; |
249 | 0 | } |
250 | | |
251 | | |
252 | | |
253 | | /*This table gives the square root of the fraction of the squared magnitude of |
254 | | each DCT coefficient relative to the total, scaled by 2**16, for both INTRA |
255 | | and INTER modes. |
256 | | These values were measured after motion-compensated prediction, before |
257 | | quantization, over a large set of test video (from QCIF to 1080p) encoded at |
258 | | all possible rates. |
259 | | The DC coefficient takes into account the DPCM prediction (using the |
260 | | quantized values from neighboring blocks, as the encoder does, but still |
261 | | before quantization of the coefficient in the current block). |
262 | | The results differ significantly from the expected variance (e.g., using an |
263 | | AR(1) model of the signal with rho=0.95, as is frequently done to compute |
264 | | the coding gain of the DCT). |
265 | | We use them to estimate an "average" quantizer for a given quantizer matrix, |
266 | | as this is used to parameterize a number of the rate control decisions. |
267 | | These values are themselves probably quantizer-matrix dependent, since the |
268 | | shape of the matrix affects the noise distribution in the reference frames, |
269 | | but they should at least give us _some_ amount of adaptivity to different |
270 | | matrices, as opposed to hard-coding a table of average Q values for the |
271 | | current set. |
272 | | The main features they capture are that a) only a few of the quantizers in |
273 | | the upper-left corner contribute anything significant at all (though INTER |
274 | | mode is significantly flatter) and b) the DPCM prediction of the DC |
275 | | coefficient gives a very minor improvement in the INTRA case and a quite |
276 | | significant one in the INTER case (over the expected variance).*/ |
277 | | static const ogg_uint16_t OC_RPSD[2][64]={ |
278 | | { |
279 | | 52725,17370,10399, 6867, 5115, 3798, 2942, 2076, |
280 | | 17370, 9900, 6948, 4994, 3836, 2869, 2229, 1619, |
281 | | 10399, 6948, 5516, 4202, 3376, 2573, 2015, 1461, |
282 | | 6867, 4994, 4202, 3377, 2800, 2164, 1718, 1243, |
283 | | 5115, 3836, 3376, 2800, 2391, 1884, 1530, 1091, |
284 | | 3798, 2869, 2573, 2164, 1884, 1495, 1212, 873, |
285 | | 2942, 2229, 2015, 1718, 1530, 1212, 1001, 704, |
286 | | 2076, 1619, 1461, 1243, 1091, 873, 704, 474 |
287 | | }, |
288 | | { |
289 | | 23411,15604,13529,11601,10683, 8958, 7840, 6142, |
290 | | 15604,11901,10718, 9108, 8290, 6961, 6023, 4487, |
291 | | 13529,10718, 9961, 8527, 7945, 6689, 5742, 4333, |
292 | | 11601, 9108, 8527, 7414, 7084, 5923, 5175, 3743, |
293 | | 10683, 8290, 7945, 7084, 6771, 5754, 4793, 3504, |
294 | | 8958, 6961, 6689, 5923, 5754, 4679, 3936, 2989, |
295 | | 7840, 6023, 5742, 5175, 4793, 3936, 3522, 2558, |
296 | | 6142, 4487, 4333, 3743, 3504, 2989, 2558, 1829 |
297 | | } |
298 | | }; |
299 | | |
300 | | /*The fraction of the squared magnitude of the residuals in each color channel |
301 | | relative to the total, scaled by 2**16, for each pixel format. |
302 | | These values were measured after motion-compensated prediction, before |
303 | | quantization, over a large set of test video encoded at all possible rates. |
304 | | TODO: These values are only from INTER frames; they should be re-measured for |
305 | | INTRA frames.*/ |
306 | | static const ogg_uint16_t OC_PCD[4][3]={ |
307 | | {59926, 3038, 2572}, |
308 | | {55201, 5597, 4738}, |
309 | | {55201, 5597, 4738}, |
310 | | {47682, 9669, 8185} |
311 | | }; |
312 | | |
313 | | |
314 | | /*Compute "average" quantizers for each qi level to use for rate control. |
315 | | We do one for each color channel, as well as an average across color |
316 | | channels, separately for INTER and INTRA, since their behavior is very |
317 | | different. |
318 | | The basic approach is to compute a harmonic average of the squared quantizer, |
319 | | weighted by the expected squared magnitude of the DCT coefficients. |
320 | | Under the (not quite true) assumption that DCT coefficients are |
321 | | Laplacian-distributed, this preserves the product Q*lambda, where |
322 | | lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be |
323 | | confused with the lambda used in R-D optimization throughout most of the |
324 | | rest of the code), when the distributions from multiple coefficients are |
325 | | pooled. |
326 | | The value Q*lambda completely determines the entropy of coefficients drawn |
327 | | from a Laplacian distribution, and thus the expected bitrate.*/ |
328 | | void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64], |
329 | | ogg_int16_t _log_plq[64][3][2],ogg_uint16_t _chroma_rd_scale[2][64][2], |
330 | 3.41k | ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){ |
331 | 3.41k | int qi; |
332 | 3.41k | int pli; |
333 | 3.41k | int qti; |
334 | 3.41k | int ci; |
335 | 444k | for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){ |
336 | 437k | ogg_int64_t q2; |
337 | 437k | ogg_uint32_t qp[3]; |
338 | 437k | ogg_uint32_t cqp; |
339 | 437k | ogg_uint32_t d; |
340 | 437k | q2=0; |
341 | 1.74M | for(pli=0;pli<3;pli++){ |
342 | 1.31M | qp[pli]=0; |
343 | 85.2M | for(ci=0;ci<64;ci++){ |
344 | 83.9M | unsigned rq; |
345 | 83.9M | unsigned qd; |
346 | 83.9M | qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]]; |
347 | 83.9M | rq=(OC_RPSD[qti][ci]+(qd>>1))/qd; |
348 | 83.9M | qp[pli]+=rq*(ogg_uint32_t)rq; |
349 | 83.9M | } |
350 | 1.31M | q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp[pli]; |
351 | | /*plq=1.0/sqrt(qp)*/ |
352 | 1.31M | _log_plq[qi][pli][qti]= |
353 | 1.31M | (ogg_int16_t)(OC_Q10(32)-oc_blog32_q10(qp[pli])>>1); |
354 | 1.31M | } |
355 | 437k | d=OC_PCD[_pixel_fmt][1]+OC_PCD[_pixel_fmt][2]; |
356 | 437k | cqp=(ogg_uint32_t)((OC_PCD[_pixel_fmt][1]*(ogg_int64_t)qp[1]+ |
357 | 437k | OC_PCD[_pixel_fmt][2]*(ogg_int64_t)qp[2]+(d>>1))/d); |
358 | | /*chroma_rd_scale=clamp(0.25,cqp/qp[0],4)*/ |
359 | 437k | d=OC_MAXI(qp[0]+(1<<OC_RD_SCALE_BITS-1)>>OC_RD_SCALE_BITS,1); |
360 | 437k | d=OC_CLAMPI(1<<OC_RD_SCALE_BITS-2,(cqp+(d>>1))/d,4<<OC_RD_SCALE_BITS); |
361 | 437k | _chroma_rd_scale[qti][qi][0]=(ogg_int16_t)d; |
362 | | /*chroma_rd_iscale=clamp(0.25,qp[0]/cqp,4)*/ |
363 | 437k | d=OC_MAXI(OC_RD_ISCALE(cqp,1),1); |
364 | 437k | d=OC_CLAMPI(1<<OC_RD_ISCALE_BITS-2,(qp[0]+(d>>1))/d,4<<OC_RD_ISCALE_BITS); |
365 | 437k | _chroma_rd_scale[qti][qi][1]=(ogg_int16_t)d; |
366 | | /*qavg=1.0/sqrt(q2).*/ |
367 | 437k | _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1; |
368 | 437k | } |
369 | 3.41k | } |