alacenc.c
Go to the documentation of this file.
1 
22 #include "avcodec.h"
23 #include "put_bits.h"
24 #include "dsputil.h"
25 #include "lpc.h"
26 #include "mathops.h"
27 
28 #define DEFAULT_FRAME_SIZE 4096
29 #define DEFAULT_SAMPLE_SIZE 16
30 #define MAX_CHANNELS 8
31 #define ALAC_EXTRADATA_SIZE 36
32 #define ALAC_FRAME_HEADER_SIZE 55
33 #define ALAC_FRAME_FOOTER_SIZE 3
34 
35 #define ALAC_ESCAPE_CODE 0x1FF
36 #define ALAC_MAX_LPC_ORDER 30
37 #define DEFAULT_MAX_PRED_ORDER 6
38 #define DEFAULT_MIN_PRED_ORDER 4
39 #define ALAC_MAX_LPC_PRECISION 9
40 #define ALAC_MAX_LPC_SHIFT 9
41 
42 #define ALAC_CHMODE_LEFT_RIGHT 0
43 #define ALAC_CHMODE_LEFT_SIDE 1
44 #define ALAC_CHMODE_RIGHT_SIDE 2
45 #define ALAC_CHMODE_MID_SIDE 3
46 
47 typedef struct RiceContext {
52 } RiceContext;
53 
54 typedef struct AlacLPCContext {
55  int lpc_order;
57  int lpc_quant;
59 
60 typedef struct AlacEncodeContext {
76 
77 
79  const int16_t *input_samples)
80 {
81  int ch, i;
82 
83  for (ch = 0; ch < s->avctx->channels; ch++) {
84  const int16_t *sptr = input_samples + ch;
85  for (i = 0; i < s->avctx->frame_size; i++) {
86  s->sample_buf[ch][i] = *sptr;
87  sptr += s->avctx->channels;
88  }
89  }
90 }
91 
92 static void encode_scalar(AlacEncodeContext *s, int x,
93  int k, int write_sample_size)
94 {
95  int divisor, q, r;
96 
97  k = FFMIN(k, s->rc.k_modifier);
98  divisor = (1<<k) - 1;
99  q = x / divisor;
100  r = x % divisor;
101 
102  if (q > 8) {
103  // write escape code and sample value directly
105  put_bits(&s->pbctx, write_sample_size, x);
106  } else {
107  if (q)
108  put_bits(&s->pbctx, q, (1<<q) - 1);
109  put_bits(&s->pbctx, 1, 0);
110 
111  if (k != 1) {
112  if (r > 0)
113  put_bits(&s->pbctx, k, r+1);
114  else
115  put_bits(&s->pbctx, k-1, 0);
116  }
117  }
118 }
119 
120 static void write_frame_header(AlacEncodeContext *s, int is_verbatim)
121 {
122  put_bits(&s->pbctx, 3, s->avctx->channels-1); // No. of channels -1
123  put_bits(&s->pbctx, 16, 0); // Seems to be zero
124  put_bits(&s->pbctx, 1, 1); // Sample count is in the header
125  put_bits(&s->pbctx, 2, 0); // FIXME: Wasted bytes field
126  put_bits(&s->pbctx, 1, is_verbatim); // Audio block is verbatim
127  put_bits32(&s->pbctx, s->avctx->frame_size); // No. of samples in the frame
128 }
129 
131 {
132  int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
133  int shift[MAX_LPC_ORDER];
134  int opt_order;
135 
136  if (s->compression_level == 1) {
137  s->lpc[ch].lpc_order = 6;
138  s->lpc[ch].lpc_quant = 6;
139  s->lpc[ch].lpc_coeff[0] = 160;
140  s->lpc[ch].lpc_coeff[1] = -190;
141  s->lpc[ch].lpc_coeff[2] = 170;
142  s->lpc[ch].lpc_coeff[3] = -130;
143  s->lpc[ch].lpc_coeff[4] = 80;
144  s->lpc[ch].lpc_coeff[5] = -25;
145  } else {
146  opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
147  s->avctx->frame_size,
150  ALAC_MAX_LPC_PRECISION, coefs, shift,
153 
154  s->lpc[ch].lpc_order = opt_order;
155  s->lpc[ch].lpc_quant = shift[opt_order-1];
156  memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
157  }
158 }
159 
160 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
161 {
162  int i, best;
163  int32_t lt, rt;
164  uint64_t sum[4];
165  uint64_t score[4];
166 
167  /* calculate sum of 2nd order residual for each channel */
168  sum[0] = sum[1] = sum[2] = sum[3] = 0;
169  for (i = 2; i < n; i++) {
170  lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
171  rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
172  sum[2] += FFABS((lt + rt) >> 1);
173  sum[3] += FFABS(lt - rt);
174  sum[0] += FFABS(lt);
175  sum[1] += FFABS(rt);
176  }
177 
178  /* calculate score for each mode */
179  score[0] = sum[0] + sum[1];
180  score[1] = sum[0] + sum[3];
181  score[2] = sum[1] + sum[3];
182  score[3] = sum[2] + sum[3];
183 
184  /* return mode with lowest score */
185  best = 0;
186  for (i = 1; i < 4; i++) {
187  if (score[i] < score[best]) {
188  best = i;
189  }
190  }
191  return best;
192 }
193 
195 {
196  int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
197  int i, mode, n = s->avctx->frame_size;
198  int32_t tmp;
199 
200  mode = estimate_stereo_mode(left, right, n);
201 
202  switch(mode)
203  {
205  s->interlacing_leftweight = 0;
206  s->interlacing_shift = 0;
207  break;
208 
210  for (i = 0; i < n; i++) {
211  right[i] = left[i] - right[i];
212  }
213  s->interlacing_leftweight = 1;
214  s->interlacing_shift = 0;
215  break;
216 
218  for (i = 0; i < n; i++) {
219  tmp = right[i];
220  right[i] = left[i] - right[i];
221  left[i] = tmp + (right[i] >> 31);
222  }
223  s->interlacing_leftweight = 1;
224  s->interlacing_shift = 31;
225  break;
226 
227  default:
228  for (i = 0; i < n; i++) {
229  tmp = left[i];
230  left[i] = (tmp + right[i]) >> 1;
231  right[i] = tmp - right[i];
232  }
233  s->interlacing_leftweight = 1;
234  s->interlacing_shift = 1;
235  break;
236  }
237 }
238 
240 {
241  int i;
242  AlacLPCContext lpc = s->lpc[ch];
243 
244  if (lpc.lpc_order == 31) {
245  s->predictor_buf[0] = s->sample_buf[ch][0];
246 
247  for (i = 1; i < s->avctx->frame_size; i++)
248  s->predictor_buf[i] = s->sample_buf[ch][i] - s->sample_buf[ch][i-1];
249 
250  return;
251  }
252 
253  // generalised linear predictor
254 
255  if (lpc.lpc_order > 0) {
256  int32_t *samples = s->sample_buf[ch];
257  int32_t *residual = s->predictor_buf;
258 
259  // generate warm-up samples
260  residual[0] = samples[0];
261  for (i = 1; i <= lpc.lpc_order; i++)
262  residual[i] = samples[i] - samples[i-1];
263 
264  // perform lpc on remaining samples
265  for (i = lpc.lpc_order + 1; i < s->avctx->frame_size; i++) {
266  int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
267 
268  for (j = 0; j < lpc.lpc_order; j++) {
269  sum += (samples[lpc.lpc_order-j] - samples[0]) *
270  lpc.lpc_coeff[j];
271  }
272 
273  sum >>= lpc.lpc_quant;
274  sum += samples[0];
275  residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
276  s->write_sample_size);
277  res_val = residual[i];
278 
279  if(res_val) {
280  int index = lpc.lpc_order - 1;
281  int neg = (res_val < 0);
282 
283  while(index >= 0 && (neg ? (res_val < 0):(res_val > 0))) {
284  int val = samples[0] - samples[lpc.lpc_order - index];
285  int sign = (val ? FFSIGN(val) : 0);
286 
287  if(neg)
288  sign*=-1;
289 
290  lpc.lpc_coeff[index] -= sign;
291  val *= sign;
292  res_val -= ((val >> lpc.lpc_quant) *
293  (lpc.lpc_order - index));
294  index--;
295  }
296  }
297  samples++;
298  }
299  }
300 }
301 
303 {
304  unsigned int history = s->rc.initial_history;
305  int sign_modifier = 0, i, k;
306  int32_t *samples = s->predictor_buf;
307 
308  for (i = 0; i < s->avctx->frame_size;) {
309  int x;
310 
311  k = av_log2((history >> 9) + 3);
312 
313  x = -2*(*samples)-1;
314  x ^= (x>>31);
315 
316  samples++;
317  i++;
318 
319  encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
320 
321  history += x * s->rc.history_mult
322  - ((history * s->rc.history_mult) >> 9);
323 
324  sign_modifier = 0;
325  if (x > 0xFFFF)
326  history = 0xFFFF;
327 
328  if (history < 128 && i < s->avctx->frame_size) {
329  unsigned int block_size = 0;
330 
331  k = 7 - av_log2(history) + ((history + 16) >> 6);
332 
333  while (*samples == 0 && i < s->avctx->frame_size) {
334  samples++;
335  i++;
336  block_size++;
337  }
338  encode_scalar(s, block_size, k, 16);
339 
340  sign_modifier = (block_size <= 0xFFFF);
341 
342  history = 0;
343  }
344 
345  }
346 }
347 
349 {
350  int i, j;
351  int prediction_type = 0;
352 
353  if (s->avctx->channels == 2)
355  put_bits(&s->pbctx, 8, s->interlacing_shift);
357 
358  for (i = 0; i < s->avctx->channels; i++) {
359 
360  calc_predictor_params(s, i);
361 
362  put_bits(&s->pbctx, 4, prediction_type);
363  put_bits(&s->pbctx, 4, s->lpc[i].lpc_quant);
364 
365  put_bits(&s->pbctx, 3, s->rc.rice_modifier);
366  put_bits(&s->pbctx, 5, s->lpc[i].lpc_order);
367  // predictor coeff. table
368  for (j = 0; j < s->lpc[i].lpc_order; j++) {
369  put_sbits(&s->pbctx, 16, s->lpc[i].lpc_coeff[j]);
370  }
371  }
372 
373  // apply lpc and entropy coding to audio samples
374 
375  for (i = 0; i < s->avctx->channels; i++) {
376  alac_linear_predictor(s, i);
377 
378  // TODO: determine when this will actually help. for now it's not used.
379  if (prediction_type == 15) {
380  // 2nd pass 1st order filter
381  for (j = s->avctx->frame_size - 1; j > 0; j--)
382  s->predictor_buf[j] -= s->predictor_buf[j - 1];
383  }
384 
386  }
387 }
388 
390 {
391  AlacEncodeContext *s = avctx->priv_data;
392  int ret;
393  uint8_t *alac_extradata = av_mallocz(ALAC_EXTRADATA_SIZE+1);
394 
397 
398  if (avctx->sample_fmt != AV_SAMPLE_FMT_S16) {
399  av_log(avctx, AV_LOG_ERROR, "only pcm_s16 input samples are supported\n");
400  return -1;
401  }
402 
403  /* TODO: Correctly implement multi-channel ALAC.
404  It is similar to multi-channel AAC, in that it has a series of
405  single-channel (SCE), channel-pair (CPE), and LFE elements. */
406  if (avctx->channels > 2) {
407  av_log(avctx, AV_LOG_ERROR, "only mono or stereo input is currently supported\n");
408  return AVERROR_PATCHWELCOME;
409  }
410 
411  // Set default compression level
413  s->compression_level = 2;
414  else
415  s->compression_level = av_clip(avctx->compression_level, 0, 2);
416 
417  // Initialize default Rice parameters
418  s->rc.history_mult = 40;
419  s->rc.initial_history = 10;
420  s->rc.k_modifier = 14;
421  s->rc.rice_modifier = 4;
422 
423  s->max_coded_frame_size = 8 + (avctx->frame_size*avctx->channels*avctx->bits_per_coded_sample>>3);
424 
425  s->write_sample_size = avctx->bits_per_coded_sample + avctx->channels - 1; // FIXME: consider wasted_bytes
426 
427  AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
428  AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
429  AV_WB32(alac_extradata+12, avctx->frame_size);
430  AV_WB8 (alac_extradata+17, avctx->bits_per_coded_sample);
431  AV_WB8 (alac_extradata+21, avctx->channels);
432  AV_WB32(alac_extradata+24, s->max_coded_frame_size);
433  AV_WB32(alac_extradata+28,
434  avctx->sample_rate * avctx->channels * avctx->bits_per_coded_sample); // average bitrate
435  AV_WB32(alac_extradata+32, avctx->sample_rate);
436 
437  // Set relevant extradata fields
438  if (s->compression_level > 0) {
439  AV_WB8(alac_extradata+18, s->rc.history_mult);
440  AV_WB8(alac_extradata+19, s->rc.initial_history);
441  AV_WB8(alac_extradata+20, s->rc.k_modifier);
442  }
443 
445  if (avctx->min_prediction_order >= 0) {
446  if (avctx->min_prediction_order < MIN_LPC_ORDER ||
448  av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
449  avctx->min_prediction_order);
450  return -1;
451  }
452 
454  }
455 
457  if (avctx->max_prediction_order >= 0) {
458  if (avctx->max_prediction_order < MIN_LPC_ORDER ||
460  av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
461  avctx->max_prediction_order);
462  return -1;
463  }
464 
466  }
467 
469  av_log(avctx, AV_LOG_ERROR,
470  "invalid prediction orders: min=%d max=%d\n",
472  return -1;
473  }
474 
475  avctx->extradata = alac_extradata;
477 
478  avctx->coded_frame = avcodec_alloc_frame();
479  avctx->coded_frame->key_frame = 1;
480 
481  s->avctx = avctx;
482  ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, s->max_prediction_order,
484 
485  return ret;
486 }
487 
488 static int alac_encode_frame(AVCodecContext *avctx, uint8_t *frame,
489  int buf_size, void *data)
490 {
491  AlacEncodeContext *s = avctx->priv_data;
492  PutBitContext *pb = &s->pbctx;
493  int i, out_bytes, verbatim_flag = 0;
494 
495  if (avctx->frame_size > DEFAULT_FRAME_SIZE) {
496  av_log(avctx, AV_LOG_ERROR, "input frame size exceeded\n");
497  return -1;
498  }
499 
500  if (buf_size < 2 * s->max_coded_frame_size) {
501  av_log(avctx, AV_LOG_ERROR, "buffer size is too small\n");
502  return -1;
503  }
504 
505 verbatim:
506  init_put_bits(pb, frame, buf_size);
507 
508  if (s->compression_level == 0 || verbatim_flag) {
509  // Verbatim mode
510  const int16_t *samples = data;
511  write_frame_header(s, 1);
512  for (i = 0; i < avctx->frame_size * avctx->channels; i++) {
513  put_sbits(pb, 16, *samples++);
514  }
515  } else {
516  init_sample_buffers(s, data);
517  write_frame_header(s, 0);
519  }
520 
521  put_bits(pb, 3, 7);
522  flush_put_bits(pb);
523  out_bytes = put_bits_count(pb) >> 3;
524 
525  if (out_bytes > s->max_coded_frame_size) {
526  /* frame too large. use verbatim mode */
527  if (verbatim_flag || s->compression_level == 0) {
528  /* still too large. must be an error. */
529  av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");
530  return -1;
531  }
532  verbatim_flag = 1;
533  goto verbatim;
534  }
535 
536  return out_bytes;
537 }
538 
540 {
541  AlacEncodeContext *s = avctx->priv_data;
542  ff_lpc_end(&s->lpc_ctx);
543  av_freep(&avctx->extradata);
544  avctx->extradata_size = 0;
545  av_freep(&avctx->coded_frame);
546  return 0;
547 }
548 
550  .name = "alac",
551  .type = AVMEDIA_TYPE_AUDIO,
552  .id = CODEC_ID_ALAC,
553  .priv_data_size = sizeof(AlacEncodeContext),
555  .encode = alac_encode_frame,
557  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
558  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
560  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
561 };