/src/ffmpeg/libavcodec/g722enc.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) CMU 1993 Computer Science, Speech Group |
3 | | * Chengxiang Lu and Alex Hauptmann |
4 | | * Copyright (c) 2005 Steve Underwood <steveu at coppice.org> |
5 | | * Copyright (c) 2009 Kenan Gillet |
6 | | * Copyright (c) 2010 Martin Storsjo |
7 | | * |
8 | | * This file is part of FFmpeg. |
9 | | * |
10 | | * FFmpeg is free software; you can redistribute it and/or |
11 | | * modify it under the terms of the GNU Lesser General Public |
12 | | * License as published by the Free Software Foundation; either |
13 | | * version 2.1 of the License, or (at your option) any later version. |
14 | | * |
15 | | * FFmpeg is distributed in the hope that it will be useful, |
16 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | | * Lesser General Public License for more details. |
19 | | * |
20 | | * You should have received a copy of the GNU Lesser General Public |
21 | | * License along with FFmpeg; if not, write to the Free Software |
22 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23 | | */ |
24 | | |
25 | | /** |
26 | | * @file |
27 | | * G.722 ADPCM audio encoder |
28 | | */ |
29 | | |
30 | | #include "libavutil/avassert.h" |
31 | | #include "libavutil/channel_layout.h" |
32 | | #include "libavutil/mem.h" |
33 | | #include "avcodec.h" |
34 | | #include "codec_internal.h" |
35 | | #include "encode.h" |
36 | | #include "g722.h" |
37 | | #include "libavutil/common.h" |
38 | | |
39 | 0 | #define FREEZE_INTERVAL 128 |
40 | | |
41 | | /* This is an arbitrary value. Allowing insanely large values leads to strange |
42 | | problems, so we limit it to a reasonable value */ |
43 | 0 | #define MAX_FRAME_SIZE 32768 |
44 | | |
45 | | /* We clip the value of avctx->trellis to prevent data type overflows and |
46 | | undefined behavior. Using larger values is insanely slow anyway. */ |
47 | 0 | #define MIN_TRELLIS 0 |
48 | 0 | #define MAX_TRELLIS 16 |
49 | | |
50 | | static av_cold int g722_encode_close(AVCodecContext *avctx) |
51 | 0 | { |
52 | 0 | G722Context *c = avctx->priv_data; |
53 | 0 | int i; |
54 | 0 | for (i = 0; i < 2; i++) { |
55 | 0 | av_freep(&c->paths[i]); |
56 | 0 | av_freep(&c->node_buf[i]); |
57 | 0 | av_freep(&c->nodep_buf[i]); |
58 | 0 | } |
59 | 0 | return 0; |
60 | 0 | } |
61 | | |
62 | | static av_cold int g722_encode_init(AVCodecContext * avctx) |
63 | 0 | { |
64 | 0 | G722Context *c = avctx->priv_data; |
65 | |
|
66 | 0 | c->band[0].scale_factor = 8; |
67 | 0 | c->band[1].scale_factor = 2; |
68 | 0 | c->prev_samples_pos = 22; |
69 | |
|
70 | 0 | if (avctx->frame_size) { |
71 | | /* validate frame size */ |
72 | 0 | if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) { |
73 | 0 | int new_frame_size; |
74 | |
|
75 | 0 | if (avctx->frame_size == 1) |
76 | 0 | new_frame_size = 2; |
77 | 0 | else if (avctx->frame_size > MAX_FRAME_SIZE) |
78 | 0 | new_frame_size = MAX_FRAME_SIZE; |
79 | 0 | else |
80 | 0 | new_frame_size = avctx->frame_size - 1; |
81 | |
|
82 | 0 | av_log(avctx, AV_LOG_WARNING, "Requested frame size is not " |
83 | 0 | "allowed. Using %d instead of %d\n", new_frame_size, |
84 | 0 | avctx->frame_size); |
85 | 0 | avctx->frame_size = new_frame_size; |
86 | 0 | } |
87 | 0 | } else { |
88 | | /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is |
89 | | a common packet size for VoIP applications */ |
90 | 0 | avctx->frame_size = 320; |
91 | 0 | } |
92 | 0 | avctx->initial_padding = 22; |
93 | |
|
94 | 0 | if (avctx->trellis) { |
95 | | /* validate trellis */ |
96 | 0 | if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) { |
97 | 0 | int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS); |
98 | 0 | av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not " |
99 | 0 | "allowed. Using %d instead of %d\n", new_trellis, |
100 | 0 | avctx->trellis); |
101 | 0 | avctx->trellis = new_trellis; |
102 | 0 | } |
103 | 0 | if (avctx->trellis) { |
104 | 0 | int frontier = 1 << avctx->trellis; |
105 | 0 | int max_paths = frontier * FREEZE_INTERVAL; |
106 | |
|
107 | 0 | for (int i = 0; i < 2; i++) { |
108 | 0 | c->paths[i] = av_calloc(max_paths, sizeof(**c->paths)); |
109 | 0 | c->node_buf[i] = av_calloc(frontier, 2 * sizeof(**c->node_buf)); |
110 | 0 | c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf)); |
111 | 0 | if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) |
112 | 0 | return AVERROR(ENOMEM); |
113 | 0 | } |
114 | 0 | } |
115 | 0 | } |
116 | | |
117 | 0 | ff_g722dsp_init(&c->dsp); |
118 | |
|
119 | 0 | return 0; |
120 | 0 | } |
121 | | |
122 | | static const int16_t low_quant[33] = { |
123 | | 35, 72, 110, 150, 190, 233, 276, 323, |
124 | | 370, 422, 473, 530, 587, 650, 714, 786, |
125 | | 858, 940, 1023, 1121, 1219, 1339, 1458, 1612, |
126 | | 1765, 1980, 2195, 2557, 2919 |
127 | | }; |
128 | | |
129 | | static inline void filter_samples(G722Context *c, const int16_t *samples, |
130 | | int *xlow, int *xhigh) |
131 | 0 | { |
132 | 0 | int xout[2]; |
133 | 0 | c->prev_samples[c->prev_samples_pos++] = samples[0]; |
134 | 0 | c->prev_samples[c->prev_samples_pos++] = samples[1]; |
135 | 0 | c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout); |
136 | 0 | *xlow = xout[0] + xout[1] >> 14; |
137 | 0 | *xhigh = xout[0] - xout[1] >> 14; |
138 | 0 | if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) { |
139 | 0 | memmove(c->prev_samples, |
140 | 0 | c->prev_samples + c->prev_samples_pos - 22, |
141 | 0 | 22 * sizeof(c->prev_samples[0])); |
142 | 0 | c->prev_samples_pos = 22; |
143 | 0 | } |
144 | 0 | } |
145 | | |
146 | | static inline int encode_high(const struct G722Band *state, int xhigh) |
147 | 0 | { |
148 | 0 | int diff = av_clip_int16(xhigh - state->s_predictor); |
149 | 0 | int pred = 141 * state->scale_factor >> 8; |
150 | | /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */ |
151 | 0 | return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0); |
152 | 0 | } |
153 | | |
154 | | static inline int encode_low(const struct G722Band* state, int xlow) |
155 | 0 | { |
156 | 0 | int diff = av_clip_int16(xlow - state->s_predictor); |
157 | | /* = diff >= 0 ? diff : -(diff + 1) */ |
158 | 0 | int limit = diff ^ (diff >> (sizeof(diff)*8-1)); |
159 | 0 | int i = 0; |
160 | 0 | limit = limit + 1 << 10; |
161 | 0 | if (limit > low_quant[8] * state->scale_factor) |
162 | 0 | i = 9; |
163 | 0 | while (i < 29 && limit > low_quant[i] * state->scale_factor) |
164 | 0 | i++; |
165 | 0 | return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i; |
166 | 0 | } |
167 | | |
168 | | static void g722_encode_trellis(G722Context *c, int trellis, |
169 | | uint8_t *dst, int nb_samples, |
170 | | const int16_t *samples) |
171 | 0 | { |
172 | 0 | int i, j, k; |
173 | 0 | int frontier = 1 << trellis; |
174 | 0 | struct TrellisNode **nodes[2]; |
175 | 0 | struct TrellisNode **nodes_next[2]; |
176 | 0 | int pathn[2] = {0, 0}, froze = -1; |
177 | 0 | struct TrellisPath *p[2]; |
178 | |
|
179 | 0 | for (i = 0; i < 2; i++) { |
180 | 0 | nodes[i] = c->nodep_buf[i]; |
181 | 0 | nodes_next[i] = c->nodep_buf[i] + frontier; |
182 | 0 | memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i])); |
183 | 0 | nodes[i][0] = c->node_buf[i] + frontier; |
184 | 0 | nodes[i][0]->ssd = 0; |
185 | 0 | nodes[i][0]->path = 0; |
186 | 0 | nodes[i][0]->state = c->band[i]; |
187 | 0 | } |
188 | |
|
189 | 0 | for (i = 0; i < nb_samples >> 1; i++) { |
190 | 0 | int xlow, xhigh; |
191 | 0 | struct TrellisNode *next[2]; |
192 | 0 | int heap_pos[2] = {0, 0}; |
193 | |
|
194 | 0 | for (j = 0; j < 2; j++) { |
195 | 0 | next[j] = c->node_buf[j] + frontier*(i & 1); |
196 | 0 | memset(nodes_next[j], 0, frontier * sizeof(**nodes_next)); |
197 | 0 | } |
198 | |
|
199 | 0 | filter_samples(c, &samples[2*i], &xlow, &xhigh); |
200 | |
|
201 | 0 | for (j = 0; j < frontier && nodes[0][j]; j++) { |
202 | | /* Only k >> 2 affects the future adaptive state, therefore testing |
203 | | * small steps that don't change k >> 2 is useless, the original |
204 | | * value from encode_low is better than them. Since we step k |
205 | | * in steps of 4, make sure range is a multiple of 4, so that |
206 | | * we don't miss the original value from encode_low. */ |
207 | 0 | int range = j < frontier/2 ? 4 : 0; |
208 | 0 | struct TrellisNode *cur_node = nodes[0][j]; |
209 | |
|
210 | 0 | int ilow = encode_low(&cur_node->state, xlow); |
211 | |
|
212 | 0 | for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) { |
213 | 0 | int decoded, dec_diff, pos; |
214 | 0 | uint32_t ssd; |
215 | 0 | struct TrellisNode* node; |
216 | |
|
217 | 0 | if (k < 0) |
218 | 0 | continue; |
219 | | |
220 | 0 | decoded = av_clip_intp2((cur_node->state.scale_factor * |
221 | 0 | ff_g722_low_inv_quant6[k] >> 10) |
222 | 0 | + cur_node->state.s_predictor, 14); |
223 | 0 | dec_diff = xlow - decoded; |
224 | |
|
225 | 0 | #define STORE_NODE(index, UPDATE, VALUE)\ |
226 | 0 | ssd = cur_node->ssd + dec_diff*dec_diff;\ |
227 | | /* Check for wraparound. Using 64 bit ssd counters would \ |
228 | | * be simpler, but is slower on x86 32 bit. */\ |
229 | 0 | if (ssd < cur_node->ssd)\ |
230 | 0 | continue;\ |
231 | 0 | if (heap_pos[index] < frontier) {\ |
232 | 0 | pos = heap_pos[index]++;\ |
233 | 0 | av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\ |
234 | 0 | node = nodes_next[index][pos] = next[index]++;\ |
235 | 0 | node->path = pathn[index]++;\ |
236 | 0 | } else {\ |
237 | | /* Try to replace one of the leaf nodes with the new \ |
238 | | * one, but not always testing the same leaf position */\ |
239 | 0 | pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\ |
240 | 0 | if (ssd >= nodes_next[index][pos]->ssd)\ |
241 | 0 | continue;\ |
242 | 0 | heap_pos[index]++;\ |
243 | 0 | node = nodes_next[index][pos];\ |
244 | 0 | }\ |
245 | 0 | node->ssd = ssd;\ |
246 | 0 | node->state = cur_node->state;\ |
247 | 0 | UPDATE;\ |
248 | 0 | c->paths[index][node->path].value = VALUE;\ |
249 | 0 | c->paths[index][node->path].prev = cur_node->path;\ |
250 | | /* Sift the newly inserted node up in the heap to restore \ |
251 | | * the heap property */\ |
252 | 0 | while (pos > 0) {\ |
253 | 0 | int parent = (pos - 1) >> 1;\ |
254 | 0 | if (nodes_next[index][parent]->ssd <= ssd)\ |
255 | 0 | break;\ |
256 | 0 | FFSWAP(struct TrellisNode*, nodes_next[index][parent],\ |
257 | 0 | nodes_next[index][pos]);\ |
258 | 0 | pos = parent;\ |
259 | 0 | } |
260 | 0 | STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k); |
261 | 0 | } |
262 | 0 | } |
263 | |
|
264 | 0 | for (j = 0; j < frontier && nodes[1][j]; j++) { |
265 | 0 | int ihigh; |
266 | 0 | struct TrellisNode *cur_node = nodes[1][j]; |
267 | | |
268 | | /* We don't try to get any initial guess for ihigh via |
269 | | * encode_high - since there's only 4 possible values, test |
270 | | * them all. Testing all of these gives a much, much larger |
271 | | * gain than testing a larger range around ilow. */ |
272 | 0 | for (ihigh = 0; ihigh < 4; ihigh++) { |
273 | 0 | int dhigh, decoded, dec_diff, pos; |
274 | 0 | uint32_t ssd; |
275 | 0 | struct TrellisNode* node; |
276 | |
|
277 | 0 | dhigh = cur_node->state.scale_factor * |
278 | 0 | ff_g722_high_inv_quant[ihigh] >> 10; |
279 | 0 | decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14); |
280 | 0 | dec_diff = xhigh - decoded; |
281 | |
|
282 | 0 | STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh); |
283 | 0 | } |
284 | 0 | } |
285 | |
|
286 | 0 | for (j = 0; j < 2; j++) { |
287 | 0 | FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]); |
288 | |
|
289 | 0 | if (nodes[j][0]->ssd > (1 << 16)) { |
290 | 0 | for (k = 1; k < frontier && nodes[j][k]; k++) |
291 | 0 | nodes[j][k]->ssd -= nodes[j][0]->ssd; |
292 | 0 | nodes[j][0]->ssd = 0; |
293 | 0 | } |
294 | 0 | } |
295 | |
|
296 | 0 | if (i == froze + FREEZE_INTERVAL) { |
297 | 0 | p[0] = &c->paths[0][nodes[0][0]->path]; |
298 | 0 | p[1] = &c->paths[1][nodes[1][0]->path]; |
299 | 0 | for (j = i; j > froze; j--) { |
300 | 0 | dst[j] = p[1]->value << 6 | p[0]->value; |
301 | 0 | p[0] = &c->paths[0][p[0]->prev]; |
302 | 0 | p[1] = &c->paths[1][p[1]->prev]; |
303 | 0 | } |
304 | 0 | froze = i; |
305 | 0 | pathn[0] = pathn[1] = 0; |
306 | 0 | memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes)); |
307 | 0 | memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes)); |
308 | 0 | } |
309 | 0 | } |
310 | |
|
311 | 0 | p[0] = &c->paths[0][nodes[0][0]->path]; |
312 | 0 | p[1] = &c->paths[1][nodes[1][0]->path]; |
313 | 0 | for (j = i; j > froze; j--) { |
314 | 0 | dst[j] = p[1]->value << 6 | p[0]->value; |
315 | 0 | p[0] = &c->paths[0][p[0]->prev]; |
316 | 0 | p[1] = &c->paths[1][p[1]->prev]; |
317 | 0 | } |
318 | 0 | c->band[0] = nodes[0][0]->state; |
319 | 0 | c->band[1] = nodes[1][0]->state; |
320 | 0 | } |
321 | | |
322 | | static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, |
323 | | const int16_t *samples) |
324 | 0 | { |
325 | 0 | int xlow, xhigh, ilow, ihigh; |
326 | 0 | filter_samples(c, samples, &xlow, &xhigh); |
327 | 0 | ihigh = encode_high(&c->band[1], xhigh); |
328 | 0 | ilow = encode_low (&c->band[0], xlow); |
329 | 0 | ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor * |
330 | 0 | ff_g722_high_inv_quant[ihigh] >> 10, ihigh); |
331 | 0 | ff_g722_update_low_predictor(&c->band[0], ilow >> 2); |
332 | 0 | *dst = ihigh << 6 | ilow; |
333 | 0 | } |
334 | | |
335 | | static void g722_encode_no_trellis(G722Context *c, |
336 | | uint8_t *dst, int nb_samples, |
337 | | const int16_t *samples) |
338 | 0 | { |
339 | 0 | int i; |
340 | 0 | for (i = 0; i < nb_samples; i += 2) |
341 | 0 | encode_byte(c, dst++, &samples[i]); |
342 | 0 | } |
343 | | |
344 | | static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, |
345 | | const AVFrame *frame, int *got_packet_ptr) |
346 | 0 | { |
347 | 0 | G722Context *c = avctx->priv_data; |
348 | 0 | const int16_t *samples = (const int16_t *)frame->data[0]; |
349 | 0 | int nb_samples, out_size, ret; |
350 | |
|
351 | 0 | out_size = (frame->nb_samples + 1) / 2; |
352 | 0 | if ((ret = ff_get_encode_buffer(avctx, avpkt, out_size, 0)) < 0) |
353 | 0 | return ret; |
354 | | |
355 | 0 | nb_samples = frame->nb_samples - (frame->nb_samples & 1); |
356 | |
|
357 | 0 | if (avctx->trellis) |
358 | 0 | g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples); |
359 | 0 | else |
360 | 0 | g722_encode_no_trellis(c, avpkt->data, nb_samples, samples); |
361 | | |
362 | | /* handle last frame with odd frame_size */ |
363 | 0 | if (nb_samples < frame->nb_samples) { |
364 | 0 | int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] }; |
365 | 0 | encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples); |
366 | 0 | } |
367 | |
|
368 | 0 | if (frame->pts != AV_NOPTS_VALUE) |
369 | 0 | avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); |
370 | 0 | *got_packet_ptr = 1; |
371 | 0 | return 0; |
372 | 0 | } |
373 | | |
374 | | const FFCodec ff_adpcm_g722_encoder = { |
375 | | .p.name = "g722", |
376 | | CODEC_LONG_NAME("G.722 ADPCM"), |
377 | | .p.type = AVMEDIA_TYPE_AUDIO, |
378 | | .p.id = AV_CODEC_ID_ADPCM_G722, |
379 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SMALL_LAST_FRAME | |
380 | | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, |
381 | | .priv_data_size = sizeof(G722Context), |
382 | | .init = g722_encode_init, |
383 | | .close = g722_encode_close, |
384 | | FF_CODEC_ENCODE_CB(g722_encode_frame), |
385 | | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_S16), |
386 | | CODEC_CH_LAYOUTS(AV_CHANNEL_LAYOUT_MONO), |
387 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
388 | | }; |