nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of Libav.
8  *
9  * Libav is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * Libav is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with Libav; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
38 #include "libavutil/mathematics.h"
39 #include "nellymoser.h"
40 #include "avcodec.h"
41 #include "dsputil.h"
42 #include "fft.h"
43 #include "sinewin.h"
44 
45 #define BITSTREAM_WRITER_LE
46 #include "put_bits.h"
47 
48 #define POW_TABLE_SIZE (1<<11)
49 #define POW_TABLE_OFFSET 3
50 #define OPT_SIZE ((1<<15) + 3000)
51 
52 typedef struct NellyMoserEncodeContext {
55  int bufsel;
61  DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];
62  float (*opt )[NELLY_BANDS];
63  uint8_t (*path)[NELLY_BANDS];
65 
66 static float pow_table[POW_TABLE_SIZE];
67 
68 static const uint8_t sf_lut[96] = {
69  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
70  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
71  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
72  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
73  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
74  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
75 };
76 
77 static const uint8_t sf_delta_lut[78] = {
78  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
79  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
80  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
81  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
82  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
83 };
84 
85 static const uint8_t quant_lut[230] = {
86  0,
87 
88  0, 1, 2,
89 
90  0, 1, 2, 3, 4, 5, 6,
91 
92  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
93  12, 13, 13, 13, 14,
94 
95  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
96  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
97  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
98  30,
99 
100  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
101  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
102  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
103  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
104  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
105  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
106  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
107  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
108  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
109  61, 61, 61, 61, 62,
110 };
111 
112 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
113 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
114 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
115 
117 {
118  s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
119  s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
120  NELLY_BUF_LEN);
121  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
122 
124  ff_sine_128, NELLY_BUF_LEN);
125  s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
126  NELLY_BUF_LEN);
128 }
129 
131 {
133  int i;
134 
135  if (avctx->channels != 1) {
136  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
137  return -1;
138  }
139 
140  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
141  avctx->sample_rate != 11025 &&
142  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
144  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
145  return -1;
146  }
147 
148  avctx->frame_size = NELLY_SAMPLES;
149  s->avctx = avctx;
150  ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0);
151  dsputil_init(&s->dsp, avctx);
152 
153  /* Generate overlap window */
154  ff_sine_window_init(ff_sine_128, 128);
155  for (i = 0; i < POW_TABLE_SIZE; i++)
156  pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
157 
158  if (s->avctx->trellis) {
159  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
160  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
161  }
162 
163  return 0;
164 }
165 
167 {
169 
170  ff_mdct_end(&s->mdct_ctx);
171 
172  if (s->avctx->trellis) {
173  av_free(s->opt);
174  av_free(s->path);
175  }
176 
177  return 0;
178 }
179 
180 #define find_best(val, table, LUT, LUT_add, LUT_size) \
181  best_idx = \
182  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
183  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
184  best_idx++;
185 
186 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
187 {
188  int band, best_idx, power_idx = 0;
189  float power_candidate;
190 
191  //base exponent
192  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
193  idx_table[0] = best_idx;
194  power_idx = ff_nelly_init_table[best_idx];
195 
196  for (band = 1; band < NELLY_BANDS; band++) {
197  power_candidate = cand[band] - power_idx;
198  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
199  idx_table[band] = best_idx;
200  power_idx += ff_nelly_delta_table[best_idx];
201  }
202 }
203 
204 static inline float distance(float x, float y, int band)
205 {
206  //return pow(fabs(x-y), 2.0);
207  float tmp = x - y;
208  return tmp * tmp;
209 }
210 
211 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
212 {
213  int i, j, band, best_idx;
214  float power_candidate, best_val;
215 
216  float (*opt )[NELLY_BANDS] = s->opt ;
217  uint8_t(*path)[NELLY_BANDS] = s->path;
218 
219  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
220  opt[0][i] = INFINITY;
221  }
222 
223  for (i = 0; i < 64; i++) {
224  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
225  path[0][ff_nelly_init_table[i]] = i;
226  }
227 
228  for (band = 1; band < NELLY_BANDS; band++) {
229  int q, c = 0;
230  float tmp;
231  int idx_min, idx_max, idx;
232  power_candidate = cand[band];
233  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
234  idx_min = FFMAX(0, cand[band] - q);
235  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
236  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
237  if ( isinf(opt[band - 1][i]) )
238  continue;
239  for (j = 0; j < 32; j++) {
240  idx = i + ff_nelly_delta_table[j];
241  if (idx > idx_max)
242  break;
243  if (idx >= idx_min) {
244  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
245  if (opt[band][idx] > tmp) {
246  opt[band][idx] = tmp;
247  path[band][idx] = j;
248  c = 1;
249  }
250  }
251  }
252  }
253  }
254  assert(c); //FIXME
255  }
256 
257  best_val = INFINITY;
258  best_idx = -1;
259  band = NELLY_BANDS - 1;
260  for (i = 0; i < OPT_SIZE; i++) {
261  if (best_val > opt[band][i]) {
262  best_val = opt[band][i];
263  best_idx = i;
264  }
265  }
266  for (band = NELLY_BANDS - 1; band >= 0; band--) {
267  idx_table[band] = path[band][best_idx];
268  if (band) {
269  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
270  }
271  }
272 }
273 
280 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
281 {
282  PutBitContext pb;
283  int i, j, band, block, best_idx, power_idx = 0;
284  float power_val, coeff, coeff_sum;
285  float pows[NELLY_FILL_LEN];
286  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
287  float cand[NELLY_BANDS];
288 
289  apply_mdct(s);
290 
291  init_put_bits(&pb, output, output_size * 8);
292 
293  i = 0;
294  for (band = 0; band < NELLY_BANDS; band++) {
295  coeff_sum = 0;
296  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
297  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
298  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
299  }
300  cand[band] =
301  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
302  }
303 
304  if (s->avctx->trellis) {
305  get_exponent_dynamic(s, cand, idx_table);
306  } else {
307  get_exponent_greedy(s, cand, idx_table);
308  }
309 
310  i = 0;
311  for (band = 0; band < NELLY_BANDS; band++) {
312  if (band) {
313  power_idx += ff_nelly_delta_table[idx_table[band]];
314  put_bits(&pb, 5, idx_table[band]);
315  } else {
316  power_idx = ff_nelly_init_table[idx_table[0]];
317  put_bits(&pb, 6, idx_table[0]);
318  }
319  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
320  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321  s->mdct_out[i] *= power_val;
322  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
323  pows[i] = power_idx;
324  }
325  }
326 
327  ff_nelly_get_sample_bits(pows, bits);
328 
329  for (block = 0; block < 2; block++) {
330  for (i = 0; i < NELLY_FILL_LEN; i++) {
331  if (bits[i] > 0) {
332  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
333  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
334  best_idx =
335  quant_lut[av_clip (
336  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
337  quant_lut_offset[bits[i]],
338  quant_lut_offset[bits[i]+1] - 1
339  )];
340  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
341  best_idx++;
342 
343  put_bits(&pb, bits[i], best_idx);
344  }
345  }
346  if (!block)
348  }
349 
350  flush_put_bits(&pb);
351 }
352 
353 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
354 {
356  const float *samples = data;
357  int i;
358 
359  if (s->last_frame)
360  return 0;
361 
362  if (data) {
363  memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples));
364  for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) {
365  s->buf[s->bufsel][i] = 0;
366  }
367  s->bufsel = 1 - s->bufsel;
368  if (!s->have_saved) {
369  s->have_saved = 1;
370  return 0;
371  }
372  } else {
373  memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
374  s->bufsel = 1 - s->bufsel;
375  s->last_frame = 1;
376  }
377 
378  if (s->have_saved) {
379  encode_block(s, frame, buf_size);
380  return NELLY_BLOCK_LEN;
381  }
382  return 0;
383 }
384 
386  .name = "nellymoser",
387  .type = AVMEDIA_TYPE_AUDIO,
388  .id = CODEC_ID_NELLYMOSER,
389  .priv_data_size = sizeof(NellyMoserEncodeContext),
390  .init = encode_init,
391  .encode = encode_frame,
392  .close = encode_end,
394  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
395  .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
396 };