alac.c
Go to the documentation of this file.
1 /*
2  * ALAC (Apple Lossless Audio Codec) decoder
3  * Copyright (c) 2005 David Hammerton
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
49 #include "avcodec.h"
50 #include "internal.h"
51 #include "get_bits.h"
52 #include "bytestream.h"
53 #include "unary.h"
54 #include "mathops.h"
55 
56 #define ALAC_EXTRADATA_SIZE 36
57 #define MAX_CHANNELS 2
58 
59 typedef struct {
60 
64 
66 
67  /* buffers */
68  int32_t *predicterror_buffer[MAX_CHANNELS];
69 
70  int32_t *outputsamples_buffer[MAX_CHANNELS];
71 
72  int32_t *extra_bits_buffer[MAX_CHANNELS];
73 
74  /* stuff from setinfo */
75  uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */
76  uint8_t setinfo_sample_size; /* 0x10 */
77  uint8_t setinfo_rice_historymult; /* 0x28 */
78  uint8_t setinfo_rice_initialhistory; /* 0x0a */
79  uint8_t setinfo_rice_kmodifier; /* 0x0e */
80  /* end setinfo stuff */
81 
82  int extra_bits;
83 } ALACContext;
84 
85 static inline int decode_scalar(GetBitContext *gb, int k, int limit, int readsamplesize){
86  /* read x - number of 1s before 0 represent the rice */
87  int x = get_unary_0_9(gb);
88 
89  if (x > 8) { /* RICE THRESHOLD */
90  /* use alternative encoding */
91  x = get_bits(gb, readsamplesize);
92  } else {
93  if (k >= limit)
94  k = limit;
95 
96  if (k != 1) {
97  int extrabits = show_bits(gb, k);
98 
99  /* multiply x by 2^k - 1, as part of their strange algorithm */
100  x = (x << k) - x;
101 
102  if (extrabits > 1) {
103  x += extrabits - 1;
104  skip_bits(gb, k);
105  } else
106  skip_bits(gb, k - 1);
107  }
108  }
109  return x;
110 }
111 
113  int32_t *output_buffer,
114  int output_size,
115  int readsamplesize, /* arg_10 */
116  int rice_initialhistory, /* arg424->b */
117  int rice_kmodifier, /* arg424->d */
118  int rice_historymult, /* arg424->c */
119  int rice_kmodifier_mask /* arg424->e */
120  )
121 {
122  int output_count;
123  unsigned int history = rice_initialhistory;
124  int sign_modifier = 0;
125 
126  for (output_count = 0; output_count < output_size; output_count++) {
127  int32_t x;
128  int32_t x_modified;
129  int32_t final_val;
130 
131  /* standard rice encoding */
132  int k; /* size of extra bits */
133 
134  /* read k, that is bits as is */
135  k = av_log2((history >> 9) + 3);
136  x= decode_scalar(&alac->gb, k, rice_kmodifier, readsamplesize);
137 
138  x_modified = sign_modifier + x;
139  final_val = (x_modified + 1) / 2;
140  if (x_modified & 1) final_val *= -1;
141 
142  output_buffer[output_count] = final_val;
143 
144  sign_modifier = 0;
145 
146  /* now update the history */
147  history += x_modified * rice_historymult
148  - ((history * rice_historymult) >> 9);
149 
150  if (x_modified > 0xffff)
151  history = 0xffff;
152 
153  /* special case: there may be compressed blocks of 0 */
154  if ((history < 128) && (output_count+1 < output_size)) {
155  int k;
156  unsigned int block_size;
157 
158  sign_modifier = 1;
159 
160  k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */);
161 
162  block_size= decode_scalar(&alac->gb, k, rice_kmodifier, 16);
163 
164  if (block_size > 0) {
165  if(block_size >= output_size - output_count){
166  av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count);
167  block_size= output_size - output_count - 1;
168  }
169  memset(&output_buffer[output_count+1], 0, block_size * 4);
170  output_count += block_size;
171  }
172 
173  if (block_size > 0xffff)
174  sign_modifier = 0;
175 
176  history = 0;
177  }
178  }
179 }
180 
181 static inline int sign_only(int v)
182 {
183  return v ? FFSIGN(v) : 0;
184 }
185 
186 static void predictor_decompress_fir_adapt(int32_t *error_buffer,
187  int32_t *buffer_out,
188  int output_size,
189  int readsamplesize,
190  int16_t *predictor_coef_table,
191  int predictor_coef_num,
192  int predictor_quantitization)
193 {
194  int i;
195 
196  /* first sample always copies */
197  *buffer_out = *error_buffer;
198 
199  if (!predictor_coef_num) {
200  if (output_size <= 1)
201  return;
202 
203  memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4);
204  return;
205  }
206 
207  if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */
208  /* second-best case scenario for fir decompression,
209  * error describes a small difference from the previous sample only
210  */
211  if (output_size <= 1)
212  return;
213  for (i = 0; i < output_size - 1; i++) {
214  int32_t prev_value;
215  int32_t error_value;
216 
217  prev_value = buffer_out[i];
218  error_value = error_buffer[i+1];
219  buffer_out[i+1] =
220  sign_extend((prev_value + error_value), readsamplesize);
221  }
222  return;
223  }
224 
225  /* read warm-up samples */
226  if (predictor_coef_num > 0)
227  for (i = 0; i < predictor_coef_num; i++) {
228  int32_t val;
229 
230  val = buffer_out[i] + error_buffer[i+1];
231  val = sign_extend(val, readsamplesize);
232  buffer_out[i+1] = val;
233  }
234 
235  /* 4 and 8 are very common cases (the only ones i've seen). these
236  * should be unrolled and optimized
237  */
238 
239  /* general case */
240  if (predictor_coef_num > 0) {
241  for (i = predictor_coef_num + 1; i < output_size; i++) {
242  int j;
243  int sum = 0;
244  int outval;
245  int error_val = error_buffer[i];
246 
247  for (j = 0; j < predictor_coef_num; j++) {
248  sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) *
249  predictor_coef_table[j];
250  }
251 
252  outval = (1 << (predictor_quantitization-1)) + sum;
253  outval = outval >> predictor_quantitization;
254  outval = outval + buffer_out[0] + error_val;
255  outval = sign_extend(outval, readsamplesize);
256 
257  buffer_out[predictor_coef_num+1] = outval;
258 
259  if (error_val > 0) {
260  int predictor_num = predictor_coef_num - 1;
261 
262  while (predictor_num >= 0 && error_val > 0) {
263  int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
264  int sign = sign_only(val);
265 
266  predictor_coef_table[predictor_num] -= sign;
267 
268  val *= sign; /* absolute value */
269 
270  error_val -= ((val >> predictor_quantitization) *
271  (predictor_coef_num - predictor_num));
272 
273  predictor_num--;
274  }
275  } else if (error_val < 0) {
276  int predictor_num = predictor_coef_num - 1;
277 
278  while (predictor_num >= 0 && error_val < 0) {
279  int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
280  int sign = - sign_only(val);
281 
282  predictor_coef_table[predictor_num] -= sign;
283 
284  val *= sign; /* neg value */
285 
286  error_val -= ((val >> predictor_quantitization) *
287  (predictor_coef_num - predictor_num));
288 
289  predictor_num--;
290  }
291  }
292 
293  buffer_out++;
294  }
295  }
296 }
297 
298 static void decorrelate_stereo(int32_t *buffer[MAX_CHANNELS],
299  int numsamples, uint8_t interlacing_shift,
300  uint8_t interlacing_leftweight)
301 {
302  int i;
303 
304  for (i = 0; i < numsamples; i++) {
305  int32_t a, b;
306 
307  a = buffer[0][i];
308  b = buffer[1][i];
309 
310  a -= (b * interlacing_leftweight) >> interlacing_shift;
311  b += a;
312 
313  buffer[0][i] = b;
314  buffer[1][i] = a;
315  }
316 }
317 
318 static void append_extra_bits(int32_t *buffer[MAX_CHANNELS],
319  int32_t *extra_bits_buffer[MAX_CHANNELS],
320  int extra_bits, int numchannels, int numsamples)
321 {
322  int i, ch;
323 
324  for (ch = 0; ch < numchannels; ch++)
325  for (i = 0; i < numsamples; i++)
326  buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
327 }
328 
330  int16_t *buffer_out, int numsamples)
331 {
332  int i;
333 
334  for (i = 0; i < numsamples; i++) {
335  *buffer_out++ = buffer[0][i];
336  *buffer_out++ = buffer[1][i];
337  }
338 }
339 
341  int32_t *buffer_out, int numsamples)
342 {
343  int i;
344 
345  for (i = 0; i < numsamples; i++) {
346  *buffer_out++ = buffer[0][i] << 8;
347  *buffer_out++ = buffer[1][i] << 8;
348  }
349 }
350 
351 static int alac_decode_frame(AVCodecContext *avctx, void *data,
352  int *got_frame_ptr, AVPacket *avpkt)
353 {
354  const uint8_t *inbuffer = avpkt->data;
355  int input_buffer_size = avpkt->size;
356  ALACContext *alac = avctx->priv_data;
357 
358  int channels;
359  unsigned int outputsamples;
360  int hassize;
361  unsigned int readsamplesize;
362  int isnotcompressed;
363  uint8_t interlacing_shift;
364  uint8_t interlacing_leftweight;
365  int i, ch, ret;
366 
367  init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8);
368 
369  channels = get_bits(&alac->gb, 3) + 1;
370  if (channels != avctx->channels) {
371  av_log(avctx, AV_LOG_ERROR, "frame header channel count mismatch\n");
372  return AVERROR_INVALIDDATA;
373  }
374 
375  /* 2^result = something to do with output waiting.
376  * perhaps matters if we read > 1 frame in a pass?
377  */
378  skip_bits(&alac->gb, 4);
379 
380  skip_bits(&alac->gb, 12); /* unknown, skip 12 bits */
381 
382  /* the output sample size is stored soon */
383  hassize = get_bits1(&alac->gb);
384 
385  alac->extra_bits = get_bits(&alac->gb, 2) << 3;
386 
387  /* whether the frame is compressed */
388  isnotcompressed = get_bits1(&alac->gb);
389 
390  if (hassize) {
391  /* now read the number of samples as a 32bit integer */
392  outputsamples = get_bits_long(&alac->gb, 32);
393  if(outputsamples > alac->setinfo_max_samples_per_frame){
394  av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n", outputsamples, alac->setinfo_max_samples_per_frame);
395  return -1;
396  }
397  } else
398  outputsamples = alac->setinfo_max_samples_per_frame;
399 
400  /* get output buffer */
401  if (outputsamples > INT32_MAX) {
402  av_log(avctx, AV_LOG_ERROR, "unsupported block size: %u\n", outputsamples);
403  return AVERROR_INVALIDDATA;
404  }
405  alac->frame.nb_samples = outputsamples;
406  if ((ret = ff_get_buffer(avctx, &alac->frame)) < 0) {
407  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
408  return ret;
409  }
410 
411  readsamplesize = alac->setinfo_sample_size - alac->extra_bits + channels - 1;
412  if (readsamplesize > MIN_CACHE_BITS) {
413  av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize);
414  return -1;
415  }
416 
417  if (!isnotcompressed) {
418  /* so it is compressed */
419  int16_t predictor_coef_table[MAX_CHANNELS][32];
420  int predictor_coef_num[MAX_CHANNELS];
421  int prediction_type[MAX_CHANNELS];
422  int prediction_quantitization[MAX_CHANNELS];
423  int ricemodifier[MAX_CHANNELS];
424 
425  interlacing_shift = get_bits(&alac->gb, 8);
426  interlacing_leftweight = get_bits(&alac->gb, 8);
427 
428  for (ch = 0; ch < channels; ch++) {
429  prediction_type[ch] = get_bits(&alac->gb, 4);
430  prediction_quantitization[ch] = get_bits(&alac->gb, 4);
431 
432  ricemodifier[ch] = get_bits(&alac->gb, 3);
433  predictor_coef_num[ch] = get_bits(&alac->gb, 5);
434 
435  /* read the predictor table */
436  for (i = 0; i < predictor_coef_num[ch]; i++)
437  predictor_coef_table[ch][i] = (int16_t)get_bits(&alac->gb, 16);
438  }
439 
440  if (alac->extra_bits) {
441  for (i = 0; i < outputsamples; i++) {
442  for (ch = 0; ch < channels; ch++)
443  alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
444  }
445  }
446  for (ch = 0; ch < channels; ch++) {
448  alac->predicterror_buffer[ch],
449  outputsamples,
450  readsamplesize,
453  ricemodifier[ch] * alac->setinfo_rice_historymult / 4,
454  (1 << alac->setinfo_rice_kmodifier) - 1);
455 
456  /* adaptive FIR filter */
457  if (prediction_type[ch] == 15) {
458  /* Prediction type 15 runs the adaptive FIR twice.
459  * The first pass uses the special-case coef_num = 31, while
460  * the second pass uses the coefs from the bitstream.
461  *
462  * However, this prediction type is not currently used by the
463  * reference encoder.
464  */
466  alac->predicterror_buffer[ch],
467  outputsamples, readsamplesize,
468  NULL, 31, 0);
469  } else if (prediction_type[ch] > 0) {
470  av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
471  prediction_type[ch]);
472  }
474  alac->outputsamples_buffer[ch],
475  outputsamples, readsamplesize,
476  predictor_coef_table[ch],
477  predictor_coef_num[ch],
478  prediction_quantitization[ch]);
479  }
480  } else {
481  /* not compressed, easy case */
482  for (i = 0; i < outputsamples; i++) {
483  for (ch = 0; ch < channels; ch++) {
484  alac->outputsamples_buffer[ch][i] = get_sbits_long(&alac->gb,
485  alac->setinfo_sample_size);
486  }
487  }
488  alac->extra_bits = 0;
489  interlacing_shift = 0;
490  interlacing_leftweight = 0;
491  }
492  if (get_bits(&alac->gb, 3) != 7)
493  av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n");
494 
495  if (channels == 2 && interlacing_leftweight) {
496  decorrelate_stereo(alac->outputsamples_buffer, outputsamples,
497  interlacing_shift, interlacing_leftweight);
498  }
499 
500  if (alac->extra_bits) {
502  alac->extra_bits, alac->numchannels, outputsamples);
503  }
504 
505  switch(alac->setinfo_sample_size) {
506  case 16:
507  if (channels == 2) {
509  (int16_t *)alac->frame.data[0], outputsamples);
510  } else {
511  int16_t *outbuffer = (int16_t *)alac->frame.data[0];
512  for (i = 0; i < outputsamples; i++) {
513  outbuffer[i] = alac->outputsamples_buffer[0][i];
514  }
515  }
516  break;
517  case 24:
518  if (channels == 2) {
520  (int32_t *)alac->frame.data[0], outputsamples);
521  } else {
522  int32_t *outbuffer = (int32_t *)alac->frame.data[0];
523  for (i = 0; i < outputsamples; i++)
524  outbuffer[i] = alac->outputsamples_buffer[0][i] << 8;
525  }
526  break;
527  }
528 
529  if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8)
530  av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb));
531 
532  *got_frame_ptr = 1;
533  *(AVFrame *)data = alac->frame;
534 
535  return input_buffer_size;
536 }
537 
539 {
540  ALACContext *alac = avctx->priv_data;
541 
542  int ch;
543  for (ch = 0; ch < alac->numchannels; ch++) {
544  av_freep(&alac->predicterror_buffer[ch]);
545  av_freep(&alac->outputsamples_buffer[ch]);
546  av_freep(&alac->extra_bits_buffer[ch]);
547  }
548 
549  return 0;
550 }
551 
552 static int allocate_buffers(ALACContext *alac)
553 {
554  int ch;
555  for (ch = 0; ch < alac->numchannels; ch++) {
556  int buf_size = alac->setinfo_max_samples_per_frame * sizeof(int32_t);
557 
558  FF_ALLOC_OR_GOTO(alac->avctx, alac->predicterror_buffer[ch],
559  buf_size, buf_alloc_fail);
560 
562  buf_size, buf_alloc_fail);
563 
564  FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
565  buf_size, buf_alloc_fail);
566  }
567  return 0;
568 buf_alloc_fail:
569  alac_decode_close(alac->avctx);
570  return AVERROR(ENOMEM);
571 }
572 
573 static int alac_set_info(ALACContext *alac)
574 {
575  const unsigned char *ptr = alac->avctx->extradata;
576 
577  ptr += 4; /* size */
578  ptr += 4; /* alac */
579  ptr += 4; /* version */
580 
581  if(AV_RB32(ptr) >= UINT_MAX/4){
582  av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n");
583  return -1;
584  }
585 
586  /* buffer size / 2 ? */
587  alac->setinfo_max_samples_per_frame = bytestream_get_be32(&ptr);
588  if (!alac->setinfo_max_samples_per_frame ||
589  alac->setinfo_max_samples_per_frame > INT_MAX / sizeof(int32_t)) {
590  av_log(alac->avctx, AV_LOG_ERROR, "max samples per frame invalid: %u\n",
592  return AVERROR_INVALIDDATA;
593  }
594  ptr++; /* compatible version */
595  alac->setinfo_sample_size = *ptr++;
596  alac->setinfo_rice_historymult = *ptr++;
597  alac->setinfo_rice_initialhistory = *ptr++;
598  alac->setinfo_rice_kmodifier = *ptr++;
599  alac->numchannels = *ptr++;
600  bytestream_get_be16(&ptr); /* maxRun */
601  bytestream_get_be32(&ptr); /* max coded frame size */
602  bytestream_get_be32(&ptr); /* average bitrate */
603  bytestream_get_be32(&ptr); /* samplerate */
604 
605  return 0;
606 }
607 
609 {
610  int ret;
611  ALACContext *alac = avctx->priv_data;
612  alac->avctx = avctx;
613 
614  /* initialize from the extradata */
616  av_log(avctx, AV_LOG_ERROR, "alac: extradata is too small\n");
617  return AVERROR_INVALIDDATA;
618  }
619  if (alac_set_info(alac)) {
620  av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
621  return -1;
622  }
623 
624  switch (alac->setinfo_sample_size) {
625  case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16;
626  break;
627  case 24: avctx->sample_fmt = AV_SAMPLE_FMT_S32;
628  break;
629  default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
630  alac->setinfo_sample_size);
631  return AVERROR_PATCHWELCOME;
632  }
633 
634  if (alac->numchannels < 1) {
635  av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
636  alac->numchannels = avctx->channels;
637  } else {
638  if (alac->numchannels > MAX_CHANNELS)
639  alac->numchannels = avctx->channels;
640  else
641  avctx->channels = alac->numchannels;
642  }
643  if (avctx->channels > MAX_CHANNELS) {
644  av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
645  avctx->channels);
646  return AVERROR_PATCHWELCOME;
647  }
648 
649  if ((ret = allocate_buffers(alac)) < 0) {
650  av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
651  return ret;
652  }
653 
655  avctx->coded_frame = &alac->frame;
656 
657  return 0;
658 }
659 
661  .name = "alac",
662  .type = AVMEDIA_TYPE_AUDIO,
663  .id = CODEC_ID_ALAC,
664  .priv_data_size = sizeof(ALACContext),
668  .capabilities = CODEC_CAP_DR1,
669  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
670 };