g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
30 #include "avcodec.h"
31 #include "g722.h"
32 
33 #define FREEZE_INTERVAL 128
34 
35 /* This is an arbitrary value. Allowing insanely large values leads to strange
36  problems, so we limit it to a reasonable value */
37 #define MAX_FRAME_SIZE 32768
38 
39 /* We clip the value of avctx->trellis to prevent data type overflows and
40  undefined behavior. Using larger values is insanely slow anyway. */
41 #define MIN_TRELLIS 0
42 #define MAX_TRELLIS 16
43 
45 {
46  G722Context *c = avctx->priv_data;
47 
48  if (avctx->channels != 1) {
49  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
50  return AVERROR_INVALIDDATA;
51  }
52 
53  c->band[0].scale_factor = 8;
54  c->band[1].scale_factor = 2;
55  c->prev_samples_pos = 22;
56 
57  if (avctx->trellis) {
58  int frontier = 1 << avctx->trellis;
59  int max_paths = frontier * FREEZE_INTERVAL;
60  int i;
61  for (i = 0; i < 2; i++) {
62  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
63  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
64  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
65  }
66  }
67 
68  if (avctx->frame_size) {
69  /* validate frame size */
70  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
71  int new_frame_size;
72 
73  if (avctx->frame_size == 1)
74  new_frame_size = 2;
75  else if (avctx->frame_size > MAX_FRAME_SIZE)
76  new_frame_size = MAX_FRAME_SIZE;
77  else
78  new_frame_size = avctx->frame_size - 1;
79 
80  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
81  "allowed. Using %d instead of %d\n", new_frame_size,
82  avctx->frame_size);
83  avctx->frame_size = new_frame_size;
84  }
85  } else {
86  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
87  a common packet size for VoIP applications */
88  avctx->frame_size = 320;
89  }
90 
91  if (avctx->trellis) {
92  /* validate trellis */
93  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
94  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
95  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
96  "allowed. Using %d instead of %d\n", new_trellis,
97  avctx->trellis);
98  avctx->trellis = new_trellis;
99  }
100  }
101 
102  return 0;
103 }
104 
106 {
107  G722Context *c = avctx->priv_data;
108  int i;
109  for (i = 0; i < 2; i++) {
110  av_freep(&c->paths[i]);
111  av_freep(&c->node_buf[i]);
112  av_freep(&c->nodep_buf[i]);
113  }
114  return 0;
115 }
116 
117 static const int16_t low_quant[33] = {
118  35, 72, 110, 150, 190, 233, 276, 323,
119  370, 422, 473, 530, 587, 650, 714, 786,
120  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
121  1765, 1980, 2195, 2557, 2919
122 };
123 
124 static inline void filter_samples(G722Context *c, const int16_t *samples,
125  int *xlow, int *xhigh)
126 {
127  int xout1, xout2;
128  c->prev_samples[c->prev_samples_pos++] = samples[0];
129  c->prev_samples[c->prev_samples_pos++] = samples[1];
130  ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
131  *xlow = xout1 + xout2 >> 14;
132  *xhigh = xout1 - xout2 >> 14;
134  memmove(c->prev_samples,
135  c->prev_samples + c->prev_samples_pos - 22,
136  22 * sizeof(c->prev_samples[0]));
137  c->prev_samples_pos = 22;
138  }
139 }
140 
141 static inline int encode_high(const struct G722Band *state, int xhigh)
142 {
143  int diff = av_clip_int16(xhigh - state->s_predictor);
144  int pred = 141 * state->scale_factor >> 8;
145  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
146  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
147 }
148 
149 static inline int encode_low(const struct G722Band* state, int xlow)
150 {
151  int diff = av_clip_int16(xlow - state->s_predictor);
152  /* = diff >= 0 ? diff : -(diff + 1) */
153  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
154  int i = 0;
155  limit = limit + 1 << 10;
156  if (limit > low_quant[8] * state->scale_factor)
157  i = 9;
158  while (i < 29 && limit > low_quant[i] * state->scale_factor)
159  i++;
160  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
161 }
162 
163 static void g722_encode_trellis(G722Context *c, int trellis,
164  uint8_t *dst, int nb_samples,
165  const int16_t *samples)
166 {
167  int i, j, k;
168  int frontier = 1 << trellis;
169  struct TrellisNode **nodes[2];
170  struct TrellisNode **nodes_next[2];
171  int pathn[2] = {0, 0}, froze = -1;
172  struct TrellisPath *p[2];
173 
174  for (i = 0; i < 2; i++) {
175  nodes[i] = c->nodep_buf[i];
176  nodes_next[i] = c->nodep_buf[i] + frontier;
177  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
178  nodes[i][0] = c->node_buf[i] + frontier;
179  nodes[i][0]->ssd = 0;
180  nodes[i][0]->path = 0;
181  nodes[i][0]->state = c->band[i];
182  }
183 
184  for (i = 0; i < nb_samples >> 1; i++) {
185  int xlow, xhigh;
186  struct TrellisNode *next[2];
187  int heap_pos[2] = {0, 0};
188 
189  for (j = 0; j < 2; j++) {
190  next[j] = c->node_buf[j] + frontier*(i & 1);
191  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
192  }
193 
194  filter_samples(c, &samples[2*i], &xlow, &xhigh);
195 
196  for (j = 0; j < frontier && nodes[0][j]; j++) {
197  /* Only k >> 2 affects the future adaptive state, therefore testing
198  * small steps that don't change k >> 2 is useless, the original
199  * value from encode_low is better than them. Since we step k
200  * in steps of 4, make sure range is a multiple of 4, so that
201  * we don't miss the original value from encode_low. */
202  int range = j < frontier/2 ? 4 : 0;
203  struct TrellisNode *cur_node = nodes[0][j];
204 
205  int ilow = encode_low(&cur_node->state, xlow);
206 
207  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
208  int decoded, dec_diff, pos;
209  uint32_t ssd;
210  struct TrellisNode* node;
211 
212  if (k < 0)
213  continue;
214 
215  decoded = av_clip((cur_node->state.scale_factor *
216  ff_g722_low_inv_quant6[k] >> 10)
217  + cur_node->state.s_predictor, -16384, 16383);
218  dec_diff = xlow - decoded;
219 
220 #define STORE_NODE(index, UPDATE, VALUE)\
221  ssd = cur_node->ssd + dec_diff*dec_diff;\
222  /* Check for wraparound. Using 64 bit ssd counters would \
223  * be simpler, but is slower on x86 32 bit. */\
224  if (ssd < cur_node->ssd)\
225  continue;\
226  if (heap_pos[index] < frontier) {\
227  pos = heap_pos[index]++;\
228  assert(pathn[index] < FREEZE_INTERVAL * frontier);\
229  node = nodes_next[index][pos] = next[index]++;\
230  node->path = pathn[index]++;\
231  } else {\
232  /* Try to replace one of the leaf nodes with the new \
233  * one, but not always testing the same leaf position */\
234  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
235  if (ssd >= nodes_next[index][pos]->ssd)\
236  continue;\
237  heap_pos[index]++;\
238  node = nodes_next[index][pos];\
239  }\
240  node->ssd = ssd;\
241  node->state = cur_node->state;\
242  UPDATE;\
243  c->paths[index][node->path].value = VALUE;\
244  c->paths[index][node->path].prev = cur_node->path;\
245  /* Sift the newly inserted node up in the heap to restore \
246  * the heap property */\
247  while (pos > 0) {\
248  int parent = (pos - 1) >> 1;\
249  if (nodes_next[index][parent]->ssd <= ssd)\
250  break;\
251  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
252  nodes_next[index][pos]);\
253  pos = parent;\
254  }
255  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
256  }
257  }
258 
259  for (j = 0; j < frontier && nodes[1][j]; j++) {
260  int ihigh;
261  struct TrellisNode *cur_node = nodes[1][j];
262 
263  /* We don't try to get any initial guess for ihigh via
264  * encode_high - since there's only 4 possible values, test
265  * them all. Testing all of these gives a much, much larger
266  * gain than testing a larger range around ilow. */
267  for (ihigh = 0; ihigh < 4; ihigh++) {
268  int dhigh, decoded, dec_diff, pos;
269  uint32_t ssd;
270  struct TrellisNode* node;
271 
272  dhigh = cur_node->state.scale_factor *
273  ff_g722_high_inv_quant[ihigh] >> 10;
274  decoded = av_clip(dhigh + cur_node->state.s_predictor,
275  -16384, 16383);
276  dec_diff = xhigh - decoded;
277 
278  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
279  }
280  }
281 
282  for (j = 0; j < 2; j++) {
283  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
284 
285  if (nodes[j][0]->ssd > (1 << 16)) {
286  for (k = 1; k < frontier && nodes[j][k]; k++)
287  nodes[j][k]->ssd -= nodes[j][0]->ssd;
288  nodes[j][0]->ssd = 0;
289  }
290  }
291 
292  if (i == froze + FREEZE_INTERVAL) {
293  p[0] = &c->paths[0][nodes[0][0]->path];
294  p[1] = &c->paths[1][nodes[1][0]->path];
295  for (j = i; j > froze; j--) {
296  dst[j] = p[1]->value << 6 | p[0]->value;
297  p[0] = &c->paths[0][p[0]->prev];
298  p[1] = &c->paths[1][p[1]->prev];
299  }
300  froze = i;
301  pathn[0] = pathn[1] = 0;
302  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
303  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
304  }
305  }
306 
307  p[0] = &c->paths[0][nodes[0][0]->path];
308  p[1] = &c->paths[1][nodes[1][0]->path];
309  for (j = i; j > froze; j--) {
310  dst[j] = p[1]->value << 6 | p[0]->value;
311  p[0] = &c->paths[0][p[0]->prev];
312  p[1] = &c->paths[1][p[1]->prev];
313  }
314  c->band[0] = nodes[0][0]->state;
315  c->band[1] = nodes[1][0]->state;
316 }
317 
318 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
319  const int16_t *samples)
320 {
321  int xlow, xhigh, ilow, ihigh;
322  filter_samples(c, samples, &xlow, &xhigh);
323  ihigh = encode_high(&c->band[1], xhigh);
324  ilow = encode_low (&c->band[0], xlow);
326  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
327  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
328  *dst = ihigh << 6 | ilow;
329 }
330 
331 static void g722_encode_no_trellis(G722Context *c,
332  uint8_t *dst, int nb_samples,
333  const int16_t *samples)
334 {
335  int i;
336  for (i = 0; i < nb_samples; i += 2)
337  encode_byte(c, dst++, &samples[i]);
338 }
339 
340 static int g722_encode_frame(AVCodecContext *avctx,
341  uint8_t *dst, int buf_size, void *data)
342 {
343  G722Context *c = avctx->priv_data;
344  const int16_t *samples = data;
345  int nb_samples;
346 
347  nb_samples = avctx->frame_size - (avctx->frame_size & 1);
348 
349  if (avctx->trellis)
350  g722_encode_trellis(c, avctx->trellis, dst, nb_samples, samples);
351  else
352  g722_encode_no_trellis(c, dst, nb_samples, samples);
353 
354  /* handle last frame with odd frame_size */
355  if (nb_samples < avctx->frame_size) {
356  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
357  encode_byte(c, &dst[nb_samples >> 1], last_samples);
358  }
359 
360  return (avctx->frame_size + 1) >> 1;
361 }
362 
364  .name = "g722",
365  .type = AVMEDIA_TYPE_AUDIO,
366  .id = CODEC_ID_ADPCM_G722,
367  .priv_data_size = sizeof(G722Context),
370  .encode = g722_encode_frame,
371  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
372  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
373  .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
374 };