Libav
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #define UNCHECKED_BITSTREAM_READER 1
29 
30 #include <math.h>
31 
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/mem.h"
35 #include "avcodec.h"
36 #include "internal.h"
37 #include "get_bits.h"
38 #include "put_bits.h"
39 #include "wmavoice_data.h"
40 #include "celp_filters.h"
41 #include "acelp_vectors.h"
42 #include "acelp_filters.h"
43 #include "lsp.h"
44 #include "dct.h"
45 #include "rdft.h"
46 #include "sinewin.h"
47 
48 #define MAX_BLOCKS 8
49 #define MAX_LSPS 16
50 #define MAX_LSPS_ALIGN16 16
51 #define MAX_FRAMES 3
53 #define MAX_FRAMESIZE 160
54 #define MAX_SIGNAL_HISTORY 416
55 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
56 #define SFRAME_CACHE_MAXSIZE 256
58 #define VLC_NBITS 6
60 
61 
65 
69 enum {
72  ACB_TYPE_HAMMING = 2
77 };
80 
84 enum {
86  FCB_TYPE_HARDCODED = 1,
89  FCB_TYPE_AW_PULSES = 2,
93 };
96 
100 static const struct frame_type_desc {
107  uint16_t frame_size;
110 } frame_descs[17] = {
112  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
113  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
114  { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
115  { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
116  { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
117  { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
118  { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
119  { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
120  { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
121  { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
122  { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
123  { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
124  { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
125  { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
126  { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
127  { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
128  { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
129 };
130 
134 typedef struct {
140  int8_t vbm_tree[25];
144 
146  int history_nsamples;
149 
151  /* postfilter specific values */
152  int do_apf;
153  int denoise_strength;
155  int denoise_tilt_corr;
157  int dc_level;
159 
161  int lsps;
164  int frame_lsp_bitsize;
166  int sframe_lsp_bitsize;
168 
173  int block_pitch_nbits;
175  int block_pitch_range;
178  int block_delta_pitch_hrange;
182  uint16_t block_conv_table[4];
184 
196  int has_residual_lsps;
200  int skip_bits_next;
205 
212  PutBitContext pb;
217 
227  double prev_lsps[MAX_LSPS];
228  int last_pitch_val;
232  float silence_gain;
234 
236  int aw_pulse_range;
238  int aw_n_pulses[2];
244  int aw_first_pulse_off[2];
247  int aw_next_pulse_off_cache;
249 
255  float gain_pred_err[6];
257  float excitation_history[MAX_SIGNAL_HISTORY];
261  float synth_history[MAX_LSPS];
262 
271  RDFTContext rdft, irdft;
272  DCTContext dct, dst;
274  float sin[511], cos[511];
276  float postfilter_agc;
278  float dcf_mem[2];
280  float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
283  float denoise_filter_cache[MAX_FRAMESIZE];
285  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
287  DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
289  DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
292 
296 
306 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
307 {
308  int cntr[8] = { 0 }, n, res;
309 
310  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
311  for (n = 0; n < 17; n++) {
312  res = get_bits(gb, 3);
313  if (cntr[res] > 3) // should be >= 3 + (res == 7))
314  return -1;
315  vbm_tree[res * 3 + cntr[res]++] = n;
316  }
317  return 0;
318 }
319 
321 {
322  static const uint8_t bits[] = {
323  2, 2, 2, 4, 4, 4,
324  6, 6, 6, 8, 8, 8,
325  10, 10, 10, 12, 12, 12,
326  14, 14, 14, 14
327  };
328  static const uint16_t codes[] = {
329  0x0000, 0x0001, 0x0002, // 00/01/10
330  0x000c, 0x000d, 0x000e, // 11+00/01/10
331  0x003c, 0x003d, 0x003e, // 1111+00/01/10
332  0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10
333  0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10
334  0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10
335  0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
336  };
337 
338  INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
339  bits, 1, 1, codes, 2, 2, 132);
340 }
341 
346 {
347  int n, flags, pitch_range, lsp16_flag;
348  WMAVoiceContext *s = ctx->priv_data;
349 
358  if (ctx->extradata_size != 46) {
359  av_log(ctx, AV_LOG_ERROR,
360  "Invalid extradata size %d (should be 46)\n",
361  ctx->extradata_size);
362  return AVERROR_INVALIDDATA;
363  }
364  flags = AV_RL32(ctx->extradata + 18);
365  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
366  s->do_apf = flags & 0x1;
367  if (s->do_apf) {
368  ff_rdft_init(&s->rdft, 7, DFT_R2C);
369  ff_rdft_init(&s->irdft, 7, IDFT_C2R);
370  ff_dct_init(&s->dct, 6, DCT_I);
371  ff_dct_init(&s->dst, 6, DST_I);
372 
373  ff_sine_window_init(s->cos, 256);
374  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
375  for (n = 0; n < 255; n++) {
376  s->sin[n] = -s->sin[510 - n];
377  s->cos[510 - n] = s->cos[n];
378  }
379  }
380  s->denoise_strength = (flags >> 2) & 0xF;
381  if (s->denoise_strength >= 12) {
382  av_log(ctx, AV_LOG_ERROR,
383  "Invalid denoise filter strength %d (max=11)\n",
384  s->denoise_strength);
385  return AVERROR_INVALIDDATA;
386  }
387  s->denoise_tilt_corr = !!(flags & 0x40);
388  s->dc_level = (flags >> 7) & 0xF;
389  s->lsp_q_mode = !!(flags & 0x2000);
390  s->lsp_def_mode = !!(flags & 0x4000);
391  lsp16_flag = flags & 0x1000;
392  if (lsp16_flag) {
393  s->lsps = 16;
394  s->frame_lsp_bitsize = 34;
395  s->sframe_lsp_bitsize = 60;
396  } else {
397  s->lsps = 10;
398  s->frame_lsp_bitsize = 24;
399  s->sframe_lsp_bitsize = 48;
400  }
401  for (n = 0; n < s->lsps; n++)
402  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
403 
404  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
405  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
406  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
407  return AVERROR_INVALIDDATA;
408  }
409 
410  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
411  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
412  pitch_range = s->max_pitch_val - s->min_pitch_val;
413  if (pitch_range <= 0) {
414  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
415  return AVERROR_INVALIDDATA;
416  }
417  s->pitch_nbits = av_ceil_log2(pitch_range);
418  s->last_pitch_val = 40;
420  s->history_nsamples = s->max_pitch_val + 8;
421 
423  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
424  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
425 
426  av_log(ctx, AV_LOG_ERROR,
427  "Unsupported samplerate %d (min=%d, max=%d)\n",
428  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
429 
430  return AVERROR(ENOSYS);
431  }
432 
433  s->block_conv_table[0] = s->min_pitch_val;
434  s->block_conv_table[1] = (pitch_range * 25) >> 6;
435  s->block_conv_table[2] = (pitch_range * 44) >> 6;
436  s->block_conv_table[3] = s->max_pitch_val - 1;
437  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
438  if (s->block_delta_pitch_hrange <= 0) {
439  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
440  return AVERROR_INVALIDDATA;
441  }
442  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
444  s->block_conv_table[3] + 1 +
445  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
446  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
447 
448  ctx->channels = 1;
451 
452  return 0;
453 }
454 
476 static void adaptive_gain_control(float *out, const float *in,
477  const float *speech_synth,
478  int size, float alpha, float *gain_mem)
479 {
480  int i;
481  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
482  float mem = *gain_mem;
483 
484  for (i = 0; i < size; i++) {
485  speech_energy += fabsf(speech_synth[i]);
486  postfilter_energy += fabsf(in[i]);
487  }
488  gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
489 
490  for (i = 0; i < size; i++) {
491  mem = alpha * mem + gain_scale_factor;
492  out[i] = in[i] * mem;
493  }
494 
495  *gain_mem = mem;
496 }
497 
516 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
517  const float *in, float *out, int size)
518 {
519  int n;
520  float optimal_gain = 0, dot;
521  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
522  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
523  *best_hist_ptr;
524 
525  /* find best fitting point in history */
526  do {
527  dot = avpriv_scalarproduct_float_c(in, ptr, size);
528  if (dot > optimal_gain) {
529  optimal_gain = dot;
530  best_hist_ptr = ptr;
531  }
532  } while (--ptr >= end);
533 
534  if (optimal_gain <= 0)
535  return -1;
536  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
537  if (dot <= 0) // would be 1.0
538  return -1;
539 
540  if (optimal_gain <= dot) {
541  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
542  } else
543  dot = 0.625;
544 
545  /* actual smoothing */
546  for (n = 0; n < size; n++)
547  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
548 
549  return 0;
550 }
551 
562 static float tilt_factor(const float *lpcs, int n_lpcs)
563 {
564  float rh0, rh1;
565 
566  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
567  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
568 
569  return rh1 / rh0;
570 }
571 
575 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
576  int fcb_type, float *coeffs, int remainder)
577 {
578  float last_coeff, min = 15.0, max = -15.0;
579  float irange, angle_mul, gain_mul, range, sq;
580  int n, idx;
581 
582  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
583  s->rdft.rdft_calc(&s->rdft, lpcs);
584 #define log_range(var, assign) do { \
585  float tmp = log10f(assign); var = tmp; \
586  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
587  } while (0)
588  log_range(last_coeff, lpcs[1] * lpcs[1]);
589  for (n = 1; n < 64; n++)
590  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
591  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
592  log_range(lpcs[0], lpcs[0] * lpcs[0]);
593 #undef log_range
594  range = max - min;
595  lpcs[64] = last_coeff;
596 
597  /* Now, use this spectrum to pick out these frequencies with higher
598  * (relative) power/energy (which we then take to be "not noise"),
599  * and set up a table (still in lpc[]) of (relative) gains per frequency.
600  * These frequencies will be maintained, while others ("noise") will be
601  * decreased in the filter output. */
602  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
603  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
604  (5.0 / 14.7));
605  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
606  for (n = 0; n <= 64; n++) {
607  float pwr;
608 
609  idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
611  lpcs[n] = angle_mul * pwr;
612 
613  /* 70.57 =~ 1/log10(1.0331663) */
614  idx = (pwr * gain_mul - 0.0295) * 70.570526123;
615  if (idx > 127) { // fall back if index falls outside table range
616  coeffs[n] = wmavoice_energy_table[127] *
617  powf(1.0331663, idx - 127);
618  } else
619  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
620  }
621 
622  /* calculate the Hilbert transform of the gains, which we do (since this
623  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
624  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
625  * "moment" of the LPCs in this filter. */
626  s->dct.dct_calc(&s->dct, lpcs);
627  s->dst.dct_calc(&s->dst, lpcs);
628 
629  /* Split out the coefficient indexes into phase/magnitude pairs */
630  idx = 255 + av_clip(lpcs[64], -255, 255);
631  coeffs[0] = coeffs[0] * s->cos[idx];
632  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
633  last_coeff = coeffs[64] * s->cos[idx];
634  for (n = 63;; n--) {
635  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
636  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
637  coeffs[n * 2] = coeffs[n] * s->cos[idx];
638 
639  if (!--n) break;
640 
641  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
642  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
643  coeffs[n * 2] = coeffs[n] * s->cos[idx];
644  }
645  coeffs[1] = last_coeff;
646 
647  /* move into real domain */
648  s->irdft.rdft_calc(&s->irdft, coeffs);
649 
650  /* tilt correction and normalize scale */
651  memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
652  if (s->denoise_tilt_corr) {
653  float tilt_mem = 0;
654 
655  coeffs[remainder - 1] = 0;
656  ff_tilt_compensation(&tilt_mem,
657  -1.8 * tilt_factor(coeffs, remainder - 1),
658  coeffs, remainder);
659  }
660  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
661  remainder));
662  for (n = 0; n < remainder; n++)
663  coeffs[n] *= sq;
664 }
665 
692 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
693  float *synth_pf, int size,
694  const float *lpcs)
695 {
696  int remainder, lim, n;
697 
698  if (fcb_type != FCB_TYPE_SILENCE) {
699  float *tilted_lpcs = s->tilted_lpcs_pf,
700  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
701 
702  tilted_lpcs[0] = 1.0;
703  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
704  memset(&tilted_lpcs[s->lsps + 1], 0,
705  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
706  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
707  tilted_lpcs, s->lsps + 2);
708 
709  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
710  * size is applied to the next frame. All input beyond this is zero,
711  * and thus all output beyond this will go towards zero, hence we can
712  * limit to min(size-1, 127-size) as a performance consideration. */
713  remainder = FFMIN(127 - size, size - 1);
714  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
715 
716  /* apply coefficients (in frequency spectrum domain), i.e. complex
717  * number multiplication */
718  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
719  s->rdft.rdft_calc(&s->rdft, synth_pf);
720  s->rdft.rdft_calc(&s->rdft, coeffs);
721  synth_pf[0] *= coeffs[0];
722  synth_pf[1] *= coeffs[1];
723  for (n = 1; n < 64; n++) {
724  float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
725  synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
726  synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
727  }
728  s->irdft.rdft_calc(&s->irdft, synth_pf);
729  }
730 
731  /* merge filter output with the history of previous runs */
732  if (s->denoise_filter_cache_size) {
733  lim = FFMIN(s->denoise_filter_cache_size, size);
734  for (n = 0; n < lim; n++)
735  synth_pf[n] += s->denoise_filter_cache[n];
736  s->denoise_filter_cache_size -= lim;
737  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
739  }
740 
741  /* move remainder of filter output into a cache for future runs */
742  if (fcb_type != FCB_TYPE_SILENCE) {
743  lim = FFMIN(remainder, s->denoise_filter_cache_size);
744  for (n = 0; n < lim; n++)
745  s->denoise_filter_cache[n] += synth_pf[size + n];
746  if (lim < remainder) {
747  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
748  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
749  s->denoise_filter_cache_size = remainder;
750  }
751  }
752 }
753 
774 static void postfilter(WMAVoiceContext *s, const float *synth,
775  float *samples, int size,
776  const float *lpcs, float *zero_exc_pf,
777  int fcb_type, int pitch)
778 {
779  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
780  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
781  *synth_filter_in = zero_exc_pf;
782 
783  assert(size <= MAX_FRAMESIZE / 2);
784 
785  /* generate excitation from input signal */
786  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
787 
788  if (fcb_type >= FCB_TYPE_AW_PULSES &&
789  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
790  synth_filter_in = synth_filter_in_buf;
791 
792  /* re-synthesize speech after smoothening, and keep history */
793  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
794  synth_filter_in, size, s->lsps);
795  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
796  sizeof(synth_pf[0]) * s->lsps);
797 
798  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
799 
800  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
801  &s->postfilter_agc);
802 
803  if (s->dc_level > 8) {
804  /* remove ultra-low frequency DC noise / highpass filter;
805  * coefficients are identical to those used in SIPR decoding,
806  * and very closely resemble those used in AMR-NB decoding. */
808  (const float[2]) { -1.99997, 1.0 },
809  (const float[2]) { -1.9330735188, 0.93589198496 },
810  0.93980580475, s->dcf_mem, size);
811  }
812 }
828 static void dequant_lsps(double *lsps, int num,
829  const uint16_t *values,
830  const uint16_t *sizes,
831  int n_stages, const uint8_t *table,
832  const double *mul_q,
833  const double *base_q)
834 {
835  int n, m;
836 
837  memset(lsps, 0, num * sizeof(*lsps));
838  for (n = 0; n < n_stages; n++) {
839  const uint8_t *t_off = &table[values[n] * num];
840  double base = base_q[n], mul = mul_q[n];
841 
842  for (m = 0; m < num; m++)
843  lsps[m] += base + mul * t_off[m];
844 
845  table += sizes[n] * num;
846  }
847 }
848 
860 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
861 {
862  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
863  static const double mul_lsf[4] = {
864  5.2187144800e-3, 1.4626986422e-3,
865  9.6179549166e-4, 1.1325736225e-3
866  };
867  static const double base_lsf[4] = {
868  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
869  M_PI * -3.3486e-2, M_PI * -5.7408e-2
870  };
871  uint16_t v[4];
872 
873  v[0] = get_bits(gb, 8);
874  v[1] = get_bits(gb, 6);
875  v[2] = get_bits(gb, 5);
876  v[3] = get_bits(gb, 5);
877 
878  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
879  mul_lsf, base_lsf);
880 }
881 
887  double *i_lsps, const double *old,
888  double *a1, double *a2, int q_mode)
889 {
890  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
891  static const double mul_lsf[3] = {
892  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
893  };
894  static const double base_lsf[3] = {
895  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
896  };
897  const float (*ipol_tab)[2][10] = q_mode ?
899  uint16_t interpol, v[3];
900  int n;
901 
902  dequant_lsp10i(gb, i_lsps);
903 
904  interpol = get_bits(gb, 5);
905  v[0] = get_bits(gb, 7);
906  v[1] = get_bits(gb, 6);
907  v[2] = get_bits(gb, 6);
908 
909  for (n = 0; n < 10; n++) {
910  double delta = old[n] - i_lsps[n];
911  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
912  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
913  }
914 
915  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
916  mul_lsf, base_lsf);
917 }
918 
922 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
923 {
924  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
925  static const double mul_lsf[5] = {
926  3.3439586280e-3, 6.9908173703e-4,
927  3.3216608306e-3, 1.0334960326e-3,
928  3.1899104283e-3
929  };
930  static const double base_lsf[5] = {
931  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
932  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
933  M_PI * -1.29816e-1
934  };
935  uint16_t v[5];
936 
937  v[0] = get_bits(gb, 8);
938  v[1] = get_bits(gb, 6);
939  v[2] = get_bits(gb, 7);
940  v[3] = get_bits(gb, 6);
941  v[4] = get_bits(gb, 7);
942 
943  dequant_lsps( lsps, 5, v, vec_sizes, 2,
944  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
945  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
946  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
947  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
948  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
949 }
950 
956  double *i_lsps, const double *old,
957  double *a1, double *a2, int q_mode)
958 {
959  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
960  static const double mul_lsf[3] = {
961  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
962  };
963  static const double base_lsf[3] = {
964  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
965  };
966  const float (*ipol_tab)[2][16] = q_mode ?
968  uint16_t interpol, v[3];
969  int n;
970 
971  dequant_lsp16i(gb, i_lsps);
972 
973  interpol = get_bits(gb, 5);
974  v[0] = get_bits(gb, 7);
975  v[1] = get_bits(gb, 7);
976  v[2] = get_bits(gb, 7);
977 
978  for (n = 0; n < 16; n++) {
979  double delta = old[n] - i_lsps[n];
980  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
981  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
982  }
983 
984  dequant_lsps( a2, 10, v, vec_sizes, 1,
985  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
986  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
987  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
988  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
989  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
990 }
991 
1006  const int *pitch)
1007 {
1008  static const int16_t start_offset[94] = {
1009  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1010  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1011  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1012  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1013  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1014  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1015  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1016  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1017  };
1018  int bits, offset;
1019 
1020  /* position of pulse */
1021  s->aw_idx_is_ext = 0;
1022  if ((bits = get_bits(gb, 6)) >= 54) {
1023  s->aw_idx_is_ext = 1;
1024  bits += (bits - 54) * 3 + get_bits(gb, 2);
1025  }
1026 
1027  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1028  * the distribution of the pulses in each block contained in this frame. */
1029  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1030  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1031  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1032  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1033  offset += s->aw_n_pulses[0] * pitch[0];
1034  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1035  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1036 
1037  /* if continuing from a position before the block, reset position to
1038  * start of block (when corrected for the range over which it can be
1039  * spread in aw_pulse_set1()). */
1040  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1041  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1042  s->aw_first_pulse_off[1] -= pitch[1];
1043  if (start_offset[bits] < 0)
1044  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1045  s->aw_first_pulse_off[0] -= pitch[0];
1046  }
1047 }
1048 
1058  int block_idx, AMRFixed *fcb)
1059 {
1060  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1061  uint16_t *use_mask = use_mask_mem + 2;
1062  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1063  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1064  * of idx are the position of the bit within a particular item in the
1065  * array (0 being the most significant bit, and 15 being the least
1066  * significant bit), and the remainder (>> 4) is the index in the
1067  * use_mask[]-array. This is faster and uses less memory than using a
1068  * 80-byte/80-int array. */
1069  int pulse_off = s->aw_first_pulse_off[block_idx],
1070  pulse_start, n, idx, range, aidx, start_off = 0;
1071 
1072  /* set offset of first pulse to within this block */
1073  if (s->aw_n_pulses[block_idx] > 0)
1074  while (pulse_off + s->aw_pulse_range < 1)
1075  pulse_off += fcb->pitch_lag;
1076 
1077  /* find range per pulse */
1078  if (s->aw_n_pulses[0] > 0) {
1079  if (block_idx == 0) {
1080  range = 32;
1081  } else /* block_idx = 1 */ {
1082  range = 8;
1083  if (s->aw_n_pulses[block_idx] > 0)
1084  pulse_off = s->aw_next_pulse_off_cache;
1085  }
1086  } else
1087  range = 16;
1088  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1089 
1090  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1091  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1092  * we exclude that range from being pulsed again in this function. */
1093  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1094  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1095  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1096  if (s->aw_n_pulses[block_idx] > 0)
1097  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1098  int excl_range = s->aw_pulse_range; // always 16 or 24
1099  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1100  int first_sh = 16 - (idx & 15);
1101  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1102  excl_range -= first_sh;
1103  if (excl_range >= 16) {
1104  *use_mask_ptr++ = 0;
1105  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1106  } else
1107  *use_mask_ptr &= 0xFFFF >> excl_range;
1108  }
1109 
1110  /* find the 'aidx'th offset that is not excluded */
1111  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1112  for (n = 0; n <= aidx; pulse_start++) {
1113  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1114  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1115  if (use_mask[0]) idx = 0x0F;
1116  else if (use_mask[1]) idx = 0x1F;
1117  else if (use_mask[2]) idx = 0x2F;
1118  else if (use_mask[3]) idx = 0x3F;
1119  else if (use_mask[4]) idx = 0x4F;
1120  else return -1;
1121  idx -= av_log2_16bit(use_mask[idx >> 4]);
1122  }
1123  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1124  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1125  n++;
1126  start_off = idx;
1127  }
1128  }
1129 
1130  fcb->x[fcb->n] = start_off;
1131  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1132  fcb->n++;
1133 
1134  /* set offset for next block, relative to start of that block */
1135  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1136  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1137  return 0;
1138 }
1139 
1148  int block_idx, AMRFixed *fcb)
1149 {
1150  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1151  float v;
1152 
1153  if (s->aw_n_pulses[block_idx] > 0) {
1154  int n, v_mask, i_mask, sh, n_pulses;
1155 
1156  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1157  n_pulses = 3;
1158  v_mask = 8;
1159  i_mask = 7;
1160  sh = 4;
1161  } else { // 4 pulses, 1:sign + 2:index each
1162  n_pulses = 4;
1163  v_mask = 4;
1164  i_mask = 3;
1165  sh = 3;
1166  }
1167 
1168  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1169  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1170  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1171  s->aw_first_pulse_off[block_idx];
1172  while (fcb->x[fcb->n] < 0)
1173  fcb->x[fcb->n] += fcb->pitch_lag;
1174  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1175  fcb->n++;
1176  }
1177  } else {
1178  int num2 = (val & 0x1FF) >> 1, delta, idx;
1179 
1180  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1181  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1182  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1183  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1184  v = (val & 0x200) ? -1.0 : 1.0;
1185 
1186  fcb->no_repeat_mask |= 3 << fcb->n;
1187  fcb->x[fcb->n] = idx - delta;
1188  fcb->y[fcb->n] = v;
1189  fcb->x[fcb->n + 1] = idx;
1190  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1191  fcb->n += 2;
1192  }
1193 }
1194 
1208 static int pRNG(int frame_cntr, int block_num, int block_size)
1209 {
1210  /* array to simplify the calculation of z:
1211  * y = (x % 9) * 5 + 6;
1212  * z = (49995 * x) / y;
1213  * Since y only has 9 values, we can remove the division by using a
1214  * LUT and using FASTDIV-style divisions. For each of the 9 values
1215  * of y, we can rewrite z as:
1216  * z = x * (49995 / y) + x * ((49995 % y) / y)
1217  * In this table, each col represents one possible value of y, the
1218  * first number is 49995 / y, and the second is the FASTDIV variant
1219  * of 49995 % y / y. */
1220  static const unsigned int div_tbl[9][2] = {
1221  { 8332, 3 * 715827883U }, // y = 6
1222  { 4545, 0 * 390451573U }, // y = 11
1223  { 3124, 11 * 268435456U }, // y = 16
1224  { 2380, 15 * 204522253U }, // y = 21
1225  { 1922, 23 * 165191050U }, // y = 26
1226  { 1612, 23 * 138547333U }, // y = 31
1227  { 1388, 27 * 119304648U }, // y = 36
1228  { 1219, 16 * 104755300U }, // y = 41
1229  { 1086, 39 * 93368855U } // y = 46
1230  };
1231  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1232  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1233  // so this is effectively a modulo (%)
1234  y = x - 9 * MULH(477218589, x); // x % 9
1235  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1236  // z = x * 49995 / (y * 5 + 6)
1237  return z % (1000 - block_size);
1238 }
1239 
1245  int block_idx, int size,
1246  const struct frame_type_desc *frame_desc,
1247  float *excitation)
1248 {
1249  float gain;
1250  int n, r_idx;
1251 
1252  assert(size <= MAX_FRAMESIZE);
1253 
1254  /* Set the offset from which we start reading wmavoice_std_codebook */
1255  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1256  r_idx = pRNG(s->frame_cntr, block_idx, size);
1257  gain = s->silence_gain;
1258  } else /* FCB_TYPE_HARDCODED */ {
1259  r_idx = get_bits(gb, 8);
1260  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1261  }
1262 
1263  /* Clear gain prediction parameters */
1264  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1265 
1266  /* Apply gain to hardcoded codebook and use that as excitation signal */
1267  for (n = 0; n < size; n++)
1268  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1269 }
1270 
1276  int block_idx, int size,
1277  int block_pitch_sh2,
1278  const struct frame_type_desc *frame_desc,
1279  float *excitation)
1280 {
1281  static const float gain_coeff[6] = {
1282  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1283  };
1284  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1285  int n, idx, gain_weight;
1286  AMRFixed fcb;
1287 
1288  assert(size <= MAX_FRAMESIZE / 2);
1289  memset(pulses, 0, sizeof(*pulses) * size);
1290 
1291  fcb.pitch_lag = block_pitch_sh2 >> 2;
1292  fcb.pitch_fac = 1.0;
1293  fcb.no_repeat_mask = 0;
1294  fcb.n = 0;
1295 
1296  /* For the other frame types, this is where we apply the innovation
1297  * (fixed) codebook pulses of the speech signal. */
1298  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1299  aw_pulse_set1(s, gb, block_idx, &fcb);
1300  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1301  /* Conceal the block with silence and return.
1302  * Skip the correct amount of bits to read the next
1303  * block from the correct offset. */
1304  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1305 
1306  for (n = 0; n < size; n++)
1307  excitation[n] =
1308  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1309  skip_bits(gb, 7 + 1);
1310  return;
1311  }
1312  } else /* FCB_TYPE_EXC_PULSES */ {
1313  int offset_nbits = 5 - frame_desc->log_n_blocks;
1314 
1315  fcb.no_repeat_mask = -1;
1316  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1317  * (instead of double) for a subset of pulses */
1318  for (n = 0; n < 5; n++) {
1319  float sign;
1320  int pos1, pos2;
1321 
1322  sign = get_bits1(gb) ? 1.0 : -1.0;
1323  pos1 = get_bits(gb, offset_nbits);
1324  fcb.x[fcb.n] = n + 5 * pos1;
1325  fcb.y[fcb.n++] = sign;
1326  if (n < frame_desc->dbl_pulses) {
1327  pos2 = get_bits(gb, offset_nbits);
1328  fcb.x[fcb.n] = n + 5 * pos2;
1329  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1330  }
1331  }
1332  }
1333  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1334 
1335  /* Calculate gain for adaptive & fixed codebook signal.
1336  * see ff_amr_set_fixed_gain(). */
1337  idx = get_bits(gb, 7);
1339  gain_coeff, 6) -
1340  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1341  acb_gain = wmavoice_gain_codebook_acb[idx];
1342  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1343  -2.9957322736 /* log(0.05) */,
1344  1.6094379124 /* log(5.0) */);
1345 
1346  gain_weight = 8 >> frame_desc->log_n_blocks;
1347  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1348  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1349  for (n = 0; n < gain_weight; n++)
1350  s->gain_pred_err[n] = pred_err;
1351 
1352  /* Calculation of adaptive codebook */
1353  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1354  int len;
1355  for (n = 0; n < size; n += len) {
1356  int next_idx_sh16;
1357  int abs_idx = block_idx * size + n;
1358  int pitch_sh16 = (s->last_pitch_val << 16) +
1359  s->pitch_diff_sh16 * abs_idx;
1360  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1361  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1362  idx = idx_sh16 >> 16;
1363  if (s->pitch_diff_sh16) {
1364  if (s->pitch_diff_sh16 > 0) {
1365  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1366  } else
1367  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1368  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1369  1, size - n);
1370  } else
1371  len = size;
1372 
1373  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1375  idx, 9, len);
1376  }
1377  } else /* ACB_TYPE_HAMMING */ {
1378  int block_pitch = block_pitch_sh2 >> 2;
1379  idx = block_pitch_sh2 & 3;
1380  if (idx) {
1381  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1383  idx, 8, size);
1384  } else
1385  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1386  sizeof(float) * size);
1387  }
1388 
1389  /* Interpolate ACB/FCB and use as excitation signal */
1390  ff_weighted_vector_sumf(excitation, excitation, pulses,
1391  acb_gain, fcb_gain, size);
1392 }
1393 
1411  int block_idx, int size,
1412  int block_pitch_sh2,
1413  const double *lsps, const double *prev_lsps,
1414  const struct frame_type_desc *frame_desc,
1415  float *excitation, float *synth)
1416 {
1417  double i_lsps[MAX_LSPS];
1418  float lpcs[MAX_LSPS];
1419  float fac;
1420  int n;
1421 
1422  if (frame_desc->acb_type == ACB_TYPE_NONE)
1423  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1424  else
1425  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1426  frame_desc, excitation);
1427 
1428  /* convert interpolated LSPs to LPCs */
1429  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1430  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1431  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1432  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1433 
1434  /* Speech synthesis */
1435  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1436 }
1437 
1453 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1454  float *samples,
1455  const double *lsps, const double *prev_lsps,
1456  float *excitation, float *synth)
1457 {
1458  WMAVoiceContext *s = ctx->priv_data;
1459  int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
1460  int pitch[MAX_BLOCKS], last_block_pitch;
1461 
1462  /* Parse frame type ("frame header"), see frame_descs */
1463  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
1464 
1465  if (bd_idx < 0) {
1466  av_log(ctx, AV_LOG_ERROR,
1467  "Invalid frame type VLC code, skipping\n");
1468  return AVERROR_INVALIDDATA;
1469  }
1470 
1471  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1472 
1473  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1474  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1475  /* Pitch is provided per frame, which is interpreted as the pitch of
1476  * the last sample of the last block of this frame. We can interpolate
1477  * the pitch of other blocks (and even pitch-per-sample) by gradually
1478  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1479  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1480  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1481  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1482  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1483  if (s->last_acb_type == ACB_TYPE_NONE ||
1484  20 * abs(cur_pitch_val - s->last_pitch_val) >
1485  (cur_pitch_val + s->last_pitch_val))
1486  s->last_pitch_val = cur_pitch_val;
1487 
1488  /* pitch per block */
1489  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1490  int fac = n * 2 + 1;
1491 
1492  pitch[n] = (MUL16(fac, cur_pitch_val) +
1493  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1494  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1495  }
1496 
1497  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1498  s->pitch_diff_sh16 =
1499  ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
1500  }
1501 
1502  /* Global gain (if silence) and pitch-adaptive window coordinates */
1503  switch (frame_descs[bd_idx].fcb_type) {
1504  case FCB_TYPE_SILENCE:
1506  break;
1507  case FCB_TYPE_AW_PULSES:
1508  aw_parse_coords(s, gb, pitch);
1509  break;
1510  }
1511 
1512  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1513  int bl_pitch_sh2;
1514 
1515  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1516  switch (frame_descs[bd_idx].acb_type) {
1517  case ACB_TYPE_HAMMING: {
1518  /* Pitch is given per block. Per-block pitches are encoded as an
1519  * absolute value for the first block, and then delta values
1520  * relative to this value) for all subsequent blocks. The scale of
1521  * this pitch value is semi-logaritmic compared to its use in the
1522  * decoder, so we convert it to normal scale also. */
1523  int block_pitch,
1524  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1525  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1526  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1527 
1528  if (n == 0) {
1529  block_pitch = get_bits(gb, s->block_pitch_nbits);
1530  } else
1531  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1533  /* Convert last_ so that any next delta is within _range */
1534  last_block_pitch = av_clip(block_pitch,
1536  s->block_pitch_range -
1538 
1539  /* Convert semi-log-style scale back to normal scale */
1540  if (block_pitch < t1) {
1541  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1542  } else {
1543  block_pitch -= t1;
1544  if (block_pitch < t2) {
1545  bl_pitch_sh2 =
1546  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1547  } else {
1548  block_pitch -= t2;
1549  if (block_pitch < t3) {
1550  bl_pitch_sh2 =
1551  (s->block_conv_table[2] + block_pitch) << 2;
1552  } else
1553  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1554  }
1555  }
1556  pitch[n] = bl_pitch_sh2 >> 2;
1557  break;
1558  }
1559 
1560  case ACB_TYPE_ASYMMETRIC: {
1561  bl_pitch_sh2 = pitch[n] << 2;
1562  break;
1563  }
1564 
1565  default: // ACB_TYPE_NONE has no pitch
1566  bl_pitch_sh2 = 0;
1567  break;
1568  }
1569 
1570  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1571  lsps, prev_lsps, &frame_descs[bd_idx],
1572  &excitation[n * block_nsamples],
1573  &synth[n * block_nsamples]);
1574  }
1575 
1576  /* Averaging projection filter, if applicable. Else, just copy samples
1577  * from synthesis buffer */
1578  if (s->do_apf) {
1579  double i_lsps[MAX_LSPS];
1580  float lpcs[MAX_LSPS];
1581 
1582  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1583  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1584  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1585  postfilter(s, synth, samples, 80, lpcs,
1586  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1587  frame_descs[bd_idx].fcb_type, pitch[0]);
1588 
1589  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1590  i_lsps[n] = cos(lsps[n]);
1591  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1592  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1593  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1594  frame_descs[bd_idx].fcb_type, pitch[0]);
1595  } else
1596  memcpy(samples, synth, 160 * sizeof(synth[0]));
1597 
1598  /* Cache values for next frame */
1599  s->frame_cntr++;
1600  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1601  s->last_acb_type = frame_descs[bd_idx].acb_type;
1602  switch (frame_descs[bd_idx].acb_type) {
1603  case ACB_TYPE_NONE:
1604  s->last_pitch_val = 0;
1605  break;
1606  case ACB_TYPE_ASYMMETRIC:
1607  s->last_pitch_val = cur_pitch_val;
1608  break;
1609  case ACB_TYPE_HAMMING:
1610  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1611  break;
1612  }
1613 
1614  return 0;
1615 }
1616 
1629 static void stabilize_lsps(double *lsps, int num)
1630 {
1631  int n, m, l;
1632 
1633  /* set minimum value for first, maximum value for last and minimum
1634  * spacing between LSF values.
1635  * Very similar to ff_set_min_dist_lsf(), but in double. */
1636  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1637  for (n = 1; n < num; n++)
1638  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1639  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1640 
1641  /* reorder (looks like one-time / non-recursed bubblesort).
1642  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1643  for (n = 1; n < num; n++) {
1644  if (lsps[n] < lsps[n - 1]) {
1645  for (m = 1; m < num; m++) {
1646  double tmp = lsps[m];
1647  for (l = m - 1; l >= 0; l--) {
1648  if (lsps[l] <= tmp) break;
1649  lsps[l + 1] = lsps[l];
1650  }
1651  lsps[l + 1] = tmp;
1652  }
1653  break;
1654  }
1655  }
1656 }
1657 
1668  WMAVoiceContext *s)
1669 {
1670  GetBitContext s_gb, *gb = &s_gb;
1671  int n, need_bits, bd_idx;
1672  const struct frame_type_desc *frame_desc;
1673 
1674  /* initialize a copy */
1675  init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
1676  skip_bits_long(gb, get_bits_count(orig_gb));
1677  assert(get_bits_left(gb) == get_bits_left(orig_gb));
1678 
1679  /* superframe header */
1680  if (get_bits_left(gb) < 14)
1681  return 1;
1682  if (!get_bits1(gb))
1683  return AVERROR(ENOSYS); // WMAPro-in-WMAVoice superframe
1684  if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe
1685  if (s->has_residual_lsps) { // residual LSPs (for all frames)
1686  if (get_bits_left(gb) < s->sframe_lsp_bitsize)
1687  return 1;
1689  }
1690 
1691  /* frames */
1692  for (n = 0; n < MAX_FRAMES; n++) {
1693  int aw_idx_is_ext = 0;
1694 
1695  if (!s->has_residual_lsps) { // independent LSPs (per-frame)
1696  if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
1698  }
1699  bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
1700  if (bd_idx < 0)
1701  return AVERROR_INVALIDDATA; // invalid frame type VLC code
1702  frame_desc = &frame_descs[bd_idx];
1703  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1704  if (get_bits_left(gb) < s->pitch_nbits)
1705  return 1;
1706  skip_bits_long(gb, s->pitch_nbits);
1707  }
1708  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1709  skip_bits(gb, 8);
1710  } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1711  int tmp = get_bits(gb, 6);
1712  if (tmp >= 0x36) {
1713  skip_bits(gb, 2);
1714  aw_idx_is_ext = 1;
1715  }
1716  }
1717 
1718  /* blocks */
1719  if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
1720  need_bits = s->block_pitch_nbits +
1721  (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
1722  } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1723  need_bits = 2 * !aw_idx_is_ext;
1724  } else
1725  need_bits = 0;
1726  need_bits += frame_desc->frame_size;
1727  if (get_bits_left(gb) < need_bits)
1728  return 1;
1729  skip_bits_long(gb, need_bits);
1730  }
1731 
1732  return 0;
1733 }
1734 
1752 static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
1753  int *got_frame_ptr)
1754 {
1755  WMAVoiceContext *s = ctx->priv_data;
1756  GetBitContext *gb = &s->gb, s_gb;
1757  int n, res, n_samples = 480;
1758  double lsps[MAX_FRAMES][MAX_LSPS];
1759  const double *mean_lsf = s->lsps == 16 ?
1761  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1762  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1763  float *samples;
1764 
1765  memcpy(synth, s->synth_history,
1766  s->lsps * sizeof(*synth));
1767  memcpy(excitation, s->excitation_history,
1768  s->history_nsamples * sizeof(*excitation));
1769 
1770  if (s->sframe_cache_size > 0) {
1771  gb = &s_gb;
1773  s->sframe_cache_size = 0;
1774  }
1775 
1776  if ((res = check_bits_for_superframe(gb, s)) == 1) {
1777  *got_frame_ptr = 0;
1778  return 1;
1779  } else if (res < 0)
1780  return res;
1781 
1782  /* First bit is speech/music bit, it differentiates between WMAVoice
1783  * speech samples (the actual codec) and WMAVoice music samples, which
1784  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1785  * the wild yet. */
1786  if (!get_bits1(gb)) {
1787  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1788  return AVERROR_PATCHWELCOME;
1789  }
1790 
1791  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1792  if (get_bits1(gb)) {
1793  if ((n_samples = get_bits(gb, 12)) > 480) {
1794  av_log(ctx, AV_LOG_ERROR,
1795  "Superframe encodes >480 samples (%d), not allowed\n",
1796  n_samples);
1797  return AVERROR_INVALIDDATA;
1798  }
1799  }
1800  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1801  if (s->has_residual_lsps) {
1802  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1803 
1804  for (n = 0; n < s->lsps; n++)
1805  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1806 
1807  if (s->lsps == 10) {
1808  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1809  } else /* s->lsps == 16 */
1810  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1811 
1812  for (n = 0; n < s->lsps; n++) {
1813  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1814  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1815  lsps[2][n] += mean_lsf[n];
1816  }
1817  for (n = 0; n < 3; n++)
1818  stabilize_lsps(lsps[n], s->lsps);
1819  }
1820 
1821  /* get output buffer */
1822  frame->nb_samples = 480;
1823  if ((res = ff_get_buffer(ctx, frame, 0)) < 0) {
1824  av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
1825  return res;
1826  }
1827  frame->nb_samples = n_samples;
1828  samples = (float *)frame->data[0];
1829 
1830  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1831  for (n = 0; n < 3; n++) {
1832  if (!s->has_residual_lsps) {
1833  int m;
1834 
1835  if (s->lsps == 10) {
1836  dequant_lsp10i(gb, lsps[n]);
1837  } else /* s->lsps == 16 */
1838  dequant_lsp16i(gb, lsps[n]);
1839 
1840  for (m = 0; m < s->lsps; m++)
1841  lsps[n][m] += mean_lsf[m];
1842  stabilize_lsps(lsps[n], s->lsps);
1843  }
1844 
1845  if ((res = synth_frame(ctx, gb, n,
1846  &samples[n * MAX_FRAMESIZE],
1847  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1848  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1849  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1850  *got_frame_ptr = 0;
1851  return res;
1852  }
1853  }
1854 
1855  /* Statistics? FIXME - we don't check for length, a slight overrun
1856  * will be caught by internal buffer padding, and anything else
1857  * will be skipped, not read. */
1858  if (get_bits1(gb)) {
1859  res = get_bits(gb, 4);
1860  skip_bits(gb, 10 * (res + 1));
1861  }
1862 
1863  *got_frame_ptr = 1;
1864 
1865  /* Update history */
1866  memcpy(s->prev_lsps, lsps[2],
1867  s->lsps * sizeof(*s->prev_lsps));
1868  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1869  s->lsps * sizeof(*synth));
1870  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1871  s->history_nsamples * sizeof(*excitation));
1872  if (s->do_apf)
1873  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1874  s->history_nsamples * sizeof(*s->zero_exc_pf));
1875 
1876  return 0;
1877 }
1878 
1887 {
1888  GetBitContext *gb = &s->gb;
1889  unsigned int res;
1890 
1891  if (get_bits_left(gb) < 11)
1892  return 1;
1893  skip_bits(gb, 4); // packet sequence number
1894  s->has_residual_lsps = get_bits1(gb);
1895  do {
1896  res = get_bits(gb, 6); // number of superframes per packet
1897  // (minus first one if there is spillover)
1898  if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
1899  return 1;
1900  } while (res == 0x3F);
1902 
1903  return 0;
1904 }
1905 
1921 static void copy_bits(PutBitContext *pb,
1922  const uint8_t *data, int size,
1923  GetBitContext *gb, int nbits)
1924 {
1925  int rmn_bytes, rmn_bits;
1926 
1927  rmn_bits = rmn_bytes = get_bits_left(gb);
1928  if (rmn_bits < nbits)
1929  return;
1930  if (nbits > pb->size_in_bits - put_bits_count(pb))
1931  return;
1932  rmn_bits &= 7; rmn_bytes >>= 3;
1933  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1934  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1935  avpriv_copy_bits(pb, data + size - rmn_bytes,
1936  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1937 }
1938 
1951  int *got_frame_ptr, AVPacket *avpkt)
1952 {
1953  WMAVoiceContext *s = ctx->priv_data;
1954  GetBitContext *gb = &s->gb;
1955  int size, res, pos;
1956 
1957  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1958  * header at each ctx->block_align bytes. However, Libav's ASF demuxer
1959  * feeds us ASF packets, which may concatenate multiple "codec" packets
1960  * in a single "muxer" packet, so we artificially emulate that by
1961  * capping the packet size at ctx->block_align. */
1962  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1963  if (!size) {
1964  *got_frame_ptr = 0;
1965  return 0;
1966  }
1967  init_get_bits(&s->gb, avpkt->data, size << 3);
1968 
1969  /* size == ctx->block_align is used to indicate whether we are dealing with
1970  * a new packet or a packet of which we already read the packet header
1971  * previously. */
1972  if (size == ctx->block_align) { // new packet header
1973  if ((res = parse_packet_header(s)) < 0)
1974  return res;
1975 
1976  /* If the packet header specifies a s->spillover_nbits, then we want
1977  * to push out all data of the previous packet (+ spillover) before
1978  * continuing to parse new superframes in the current packet. */
1979  if (s->spillover_nbits > 0) {
1980  if (s->sframe_cache_size > 0) {
1981  int cnt = get_bits_count(gb);
1982  copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
1983  flush_put_bits(&s->pb);
1985  if ((res = synth_superframe(ctx, data, got_frame_ptr)) == 0 &&
1986  *got_frame_ptr) {
1987  cnt += s->spillover_nbits;
1988  s->skip_bits_next = cnt & 7;
1989  return cnt >> 3;
1990  } else
1991  skip_bits_long (gb, s->spillover_nbits - cnt +
1992  get_bits_count(gb)); // resync
1993  } else
1994  skip_bits_long(gb, s->spillover_nbits); // resync
1995  }
1996  } else if (s->skip_bits_next)
1997  skip_bits(gb, s->skip_bits_next);
1998 
1999  /* Try parsing superframes in current packet */
2000  s->sframe_cache_size = 0;
2001  s->skip_bits_next = 0;
2002  pos = get_bits_left(gb);
2003  if ((res = synth_superframe(ctx, data, got_frame_ptr)) < 0) {
2004  return res;
2005  } else if (*got_frame_ptr) {
2006  int cnt = get_bits_count(gb);
2007  s->skip_bits_next = cnt & 7;
2008  return cnt >> 3;
2009  } else if ((s->sframe_cache_size = pos) > 0) {
2010  /* rewind bit reader to start of last (incomplete) superframe... */
2011  init_get_bits(gb, avpkt->data, size << 3);
2012  skip_bits_long(gb, (size << 3) - pos);
2013  assert(get_bits_left(gb) == pos);
2014 
2015  /* ...and cache it for spillover in next packet */
2017  copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
2018  // FIXME bad - just copy bytes as whole and add use the
2019  // skip_bits_next field
2020  }
2021 
2022  return size;
2023 }
2024 
2026 {
2027  WMAVoiceContext *s = ctx->priv_data;
2028 
2029  if (s->do_apf) {
2030  ff_rdft_end(&s->rdft);
2031  ff_rdft_end(&s->irdft);
2032  ff_dct_end(&s->dct);
2033  ff_dct_end(&s->dst);
2034  }
2035 
2036  return 0;
2037 }
2038 
2040 {
2041  WMAVoiceContext *s = ctx->priv_data;
2042  int n;
2043 
2044  s->postfilter_agc = 0;
2045  s->sframe_cache_size = 0;
2046  s->skip_bits_next = 0;
2047  for (n = 0; n < s->lsps; n++)
2048  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
2049  memset(s->excitation_history, 0,
2050  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
2051  memset(s->synth_history, 0,
2052  sizeof(*s->synth_history) * MAX_LSPS);
2053  memset(s->gain_pred_err, 0,
2054  sizeof(s->gain_pred_err));
2055 
2056  if (s->do_apf) {
2057  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
2058  sizeof(*s->synth_filter_out_buf) * s->lsps);
2059  memset(s->dcf_mem, 0,
2060  sizeof(*s->dcf_mem) * 2);
2061  memset(s->zero_exc_pf, 0,
2062  sizeof(*s->zero_exc_pf) * s->history_nsamples);
2063  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
2064  }
2065 }
2066 
2068  .name = "wmavoice",
2069  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
2070  .type = AVMEDIA_TYPE_AUDIO,
2071  .id = AV_CODEC_ID_WMAVOICE,
2072  .priv_data_size = sizeof(WMAVoiceContext),
2074  .init_static_data = wmavoice_init_static_data,
2077  .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
2078  .flush = wmavoice_flush,
2079 };
RDFTContext rdft
Definition: wmavoice.c:271
Description of frame types.
Definition: wmavoice.c:100
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1147
av_cold void ff_rdft_end(RDFTContext *s)
Definition: rdft.c:130
static const uint8_t wmavoice_dq_lsp16r2[0x500]
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:152
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:54
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will lief in the range [0...
Definition: wmavoice.c:1208
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:306
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:83
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:256
int size
This structure describes decoded (raw) audio or video data.
Definition: frame.h:135
void(* dct_calc)(struct DCTContext *s, FFTSample *data)
Definition: dct.h:37
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned...
Definition: wmavoice.c:248
int frame_lsp_bitsize
size (in bits) of LSPs, when encoded per-frame (independent coding)
Definition: wmavoice.c:165
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+FF_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:208
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:277
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:240
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:774
memory handling functions
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
static void skip_bits_long(GetBitContext *s, int n)
Definition: get_bits.h:199
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:289
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1005
int x[10]
Definition: acelp_vectors.h:31
int size
Definition: avcodec.h:974
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:243
void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:61
const uint8_t * buffer
Definition: get_bits.h:54
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1629
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:58
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_dlog(ac->avr,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
static const float wmavoice_gain_codebook_fcb[128]
static const uint8_t wmavoice_dq_lsp16i1[0x640]
static const uint8_t wmavoice_dq_lsp16r1[0x500]
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:195
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:174
static const uint8_t wmavoice_dq_lsp16i3[0x300]
float pitch_fac
Definition: acelp_vectors.h:35
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1453
static void calc_input_response(WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:575
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:860
AVCodec.
Definition: avcodec.h:2812
#define MUL16(a, b)
Definition: fft-test.c:50
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:50
int block_align
number of bytes per packet if constant and known or 0 Used by some WAV based audio codecs...
Definition: avcodec.h:1844
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1057
static const float wmavoice_ipol1_coeffs[17 *9]
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:76
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:145
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: float_dsp.c:104
static int decode(MimicContext *ctx, int quality, int num_coeffs, int is_iframe)
Definition: mimic.c:275
void void avpriv_request_sample(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value...
Definition: wmavoice.c:177
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:70
uint8_t bits
Definition: crc.c:251
enum AVSampleFormat sample_fmt
audio sample format
Definition: avcodec.h:1815
uint8_t
#define av_cold
Definition: attributes.h:66
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:29
static const uint8_t wmavoice_dq_lsp16r3[0x600]
float delta
#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)
Definition: get_bits.h:443
DCTContext dct
Definition: wmavoice.c:273
static const float wmavoice_gain_codebook_acb[128]
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:103
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:246
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:199
float tilted_lpcs_pf[0x80]
aligned buffer for LPC tilting
Definition: wmavoice.c:285
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1164
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:562
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:90
static const uint8_t wmavoice_dq_lsp10r[0x1400]
#define CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: avcodec.h:684
const char data[16]
Definition: mxf.c:70
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:828
static int check_bits_for_superframe(GetBitContext *orig_gb, WMAVoiceContext *s)
Test if there's enough bits to read 1 superframe.
Definition: wmavoice.c:1667
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
uint8_t * data
Definition: avcodec.h:973
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:194
static int flags
Definition: log.c:44
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:279
bitstream reader API header.
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:2039
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:261
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:227
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1921
#define expf(x)
Definition: libm.h:61
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:555
int size_in_bits
Definition: put_bits.h:39
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:123
static const double wmavoice_mean_lsf16[2][16]
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:211
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:176
static const float wmavoice_std_codebook[1000]
static const int sizes[][2]
Definition: img2dec.c:46
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:230
#define AVERROR(e)
Definition: error.h:43
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:145
static const float wmavoice_gain_silence[256]
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:284
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:148
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:169
Windows Media Voice (WMAVoice) tables.
Definition: avfft.h:73
const char * name
Name of the codec implementation.
Definition: avcodec.h:2819
int no_repeat_mask
Definition: acelp_vectors.h:33
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:156
static void put_bits(PutBitContext *s, int n, unsigned int value)
Write up to 31 bits into a bitstream.
Definition: put_bits.h:134
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:235
#define FFMAX(a, b)
Definition: common.h:55
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:183
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:88
DCTContext dst
contexts for phase shift (in Hilbert transform, part of postfilter)
Definition: wmavoice.c:273
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:163
Definition: get_bits.h:64
uint64_t channel_layout
Audio channel layout.
Definition: avcodec.h:1868
void(* rdft_calc)(struct RDFTContext *s, FFTSample *z)
Definition: rdft.h:60
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:67
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:204
#define powf(x, y)
Definition: libm.h:44
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:955
#define FF_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
Definition: avcodec.h:531
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:170
WMA Voice decoding context.
Definition: wmavoice.c:134
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it...
Definition: wmavoice.c:692
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:154
audio channel layout utility functions
Definition: avfft.h:97
#define FFMIN(a, b)
Definition: common.h:57
#define log_range(var, assign)
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:49
static VLC frame_type_vlc
Frame type VLC coding.
Definition: wmavoice.c:64
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:172
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:48
float denoise_coeffs_pf[0x80]
aligned buffer for denoise coefficients
Definition: wmavoice.c:287
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:886
int size_in_bits
Definition: get_bits.h:56
float y[10]
Definition: acelp_vectors.h:32
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:69
Definition: dct.h:31
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs...
Definition: wmavoice.c:92
float sin[511]
Definition: wmavoice.c:275
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE(*table)[2], int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:522
#define AV_RL32
Definition: intreadwrite.h:146
Definition: avfft.h:72
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:516
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
if(ac->has_optimized_func)
void ff_sine_window_init(float *window, int n)
Generate a sine window.
static const float wmavoice_gain_universal[64]
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:201
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:345
#define AVERROR_PATCHWELCOME
Not yet implemented in Libav, patches welcome.
Definition: error.h:57
int sframe_lsp_bitsize
size (in bits) of LSPs, when encoded per superframe (residual coding)
Definition: wmavoice.c:167
static const uint8_t last_coeff[3]
Definition: qdm2data.h:257
static const struct frame_type_desc frame_descs[17]
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:283
Libavcodec external API header.
int sample_rate
samples per second
Definition: avcodec.h:1807
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1950
main external API structure.
Definition: avcodec.h:1050
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder). ...
Definition: wmavoice.c:1886
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:490
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: utils.c:612
AVCodec ff_wmavoice_decoder
Definition: wmavoice.c:2067
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:143
int extradata_size
Definition: avcodec.h:1165
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:271
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1410
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2025
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:263
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
Set up DCT.
Definition: dct.c:177
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:231
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:375
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:55
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:162
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:254
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:196
static av_always_inline av_const long int lrint(double x)
Definition: libm.h:137
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:476
static const float mean_lsf[10]
Definition: siprdata.h:27
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:57
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:105
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:922
RDFTContext irdft
contexts for FFT-calculation in the postfilter (for denoise filter)
Definition: wmavoice.c:271
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:141
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1752
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_dlog(ac->avr,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
#define CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time...
Definition: avcodec.h:736
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1244
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:101
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:83
static av_cold void wmavoice_init_static_data(AVCodec *codec)
Definition: wmavoice.c:320
static av_cold void flush(AVCodecContext *avctx)
Flush (reset) the frame ID after seeking.
Definition: alsdec.c:1797
int pitch_lag
Definition: acelp_vectors.h:34
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation ...
Definition: wmavoice.c:257
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:48
static av_cold int init(AVCodecParserContext *s)
Definition: h264_parser.c:499
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:229
void * priv_data
Definition: avcodec.h:1092
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:53
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:233
static const double wmavoice_mean_lsf10[2][10]
int len
#define av_log2_16bit
Definition: intmath.h:86
int channels
number of audio channels
Definition: avcodec.h:1808
VLC_TYPE(* table)[2]
code, bits
Definition: get_bits.h:66
av_cold void ff_dct_end(DCTContext *s)
Definition: dct.c:218
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:85
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:77
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:181
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:171
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:161
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:52
static const int8_t pulses[4]
Definition: g723_1_data.h:531
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
deliberately overlapping memcpy implementation
Definition: mem.c:319
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:216
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:104
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:158
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:59
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: avfft.h:96
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:275
#define AV_CH_LAYOUT_MONO
av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
Set up a real FFT.
Definition: rdft.c:99
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:237
float min
This structure stores compressed data.
Definition: avcodec.h:950
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:179
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:280
for(j=16;j >0;--j)
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1275
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:106
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:71
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:54
uint16_t frame_size
the amount of bits that make up the block data (per frame)
Definition: wmavoice.c:109
Definition: vf_drawbox.c:37
#define MULH
Definition: mathops.h:42
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:139
bitstream writer API