aaccoder.c
Go to the documentation of this file.
1 /*
2  * AAC coefficients encoder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
27 /***********************************
28  * TODOs:
29  * speedup quantizer selection
30  * add sane pulse detection
31  ***********************************/
32 
33 #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
34 
35 #include <float.h>
36 #include "libavutil/mathematics.h"
37 #include "avcodec.h"
38 #include "put_bits.h"
39 #include "aac.h"
40 #include "aacenc.h"
41 #include "aactab.h"
42 
44 static const uint8_t run_value_bits_long[64] = {
45  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
46  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
47  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
48  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
49 };
50 
52 static const uint8_t run_value_bits_short[16] = {
53  3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
54 };
55 
56 static const uint8_t *run_value_bits[2] = {
58 };
59 
60 
66 static av_always_inline int quant(float coef, const float Q)
67 {
68  float a = coef * Q;
69  return sqrtf(a * sqrtf(a)) + 0.4054;
70 }
71 
72 static void quantize_bands(int *out, const float *in, const float *scaled,
73  int size, float Q34, int is_signed, int maxval)
74 {
75  int i;
76  double qc;
77  for (i = 0; i < size; i++) {
78  qc = scaled[i] * Q34;
79  out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
80  if (is_signed && in[i] < 0.0f) {
81  out[i] = -out[i];
82  }
83  }
84 }
85 
86 static void abs_pow34_v(float *out, const float *in, const int size)
87 {
88 #ifndef USE_REALLY_FULL_SEARCH
89  int i;
90  for (i = 0; i < size; i++) {
91  float a = fabsf(in[i]);
92  out[i] = sqrtf(a * sqrtf(a));
93  }
94 #endif /* USE_REALLY_FULL_SEARCH */
95 }
96 
97 static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
98 static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
99 
106  struct AACEncContext *s,
107  PutBitContext *pb, const float *in,
108  const float *scaled, int size, int scale_idx,
109  int cb, const float lambda, const float uplim,
110  int *bits, int BT_ZERO, int BT_UNSIGNED,
111  int BT_PAIR, int BT_ESC)
112 {
113  const float IQ = ff_aac_pow2sf_tab[POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
114  const float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
115  const float CLIPPED_ESCAPE = 165140.0f*IQ;
116  int i, j;
117  float cost = 0;
118  const int dim = BT_PAIR ? 2 : 4;
119  int resbits = 0;
120  const float Q34 = sqrtf(Q * sqrtf(Q));
121  const int range = aac_cb_range[cb];
122  const int maxval = aac_cb_maxval[cb];
123  int off;
124 
125  if (BT_ZERO) {
126  for (i = 0; i < size; i++)
127  cost += in[i]*in[i];
128  if (bits)
129  *bits = 0;
130  return cost * lambda;
131  }
132  if (!scaled) {
133  abs_pow34_v(s->scoefs, in, size);
134  scaled = s->scoefs;
135  }
136  quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, maxval);
137  if (BT_UNSIGNED) {
138  off = 0;
139  } else {
140  off = maxval;
141  }
142  for (i = 0; i < size; i += dim) {
143  const float *vec;
144  int *quants = s->qcoefs + i;
145  int curidx = 0;
146  int curbits;
147  float rd = 0.0f;
148  for (j = 0; j < dim; j++) {
149  curidx *= range;
150  curidx += quants[j] + off;
151  }
152  curbits = ff_aac_spectral_bits[cb-1][curidx];
153  vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
154  if (BT_UNSIGNED) {
155  for (j = 0; j < dim; j++) {
156  float t = fabsf(in[i+j]);
157  float di;
158  if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
159  if (t >= CLIPPED_ESCAPE) {
160  di = t - CLIPPED_ESCAPE;
161  curbits += 21;
162  } else {
163  int c = av_clip(quant(t, Q), 0, 8191);
164  di = t - c*cbrtf(c)*IQ;
165  curbits += av_log2(c)*2 - 4 + 1;
166  }
167  } else {
168  di = t - vec[j]*IQ;
169  }
170  if (vec[j] != 0.0f)
171  curbits++;
172  rd += di*di;
173  }
174  } else {
175  for (j = 0; j < dim; j++) {
176  float di = in[i+j] - vec[j]*IQ;
177  rd += di*di;
178  }
179  }
180  cost += rd * lambda + curbits;
181  resbits += curbits;
182  if (cost >= uplim)
183  return uplim;
184  if (pb) {
185  put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
186  if (BT_UNSIGNED)
187  for (j = 0; j < dim; j++)
188  if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
189  put_bits(pb, 1, in[i+j] < 0.0f);
190  if (BT_ESC) {
191  for (j = 0; j < 2; j++) {
192  if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
193  int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
194  int len = av_log2(coef);
195 
196  put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
197  put_bits(pb, len, coef & ((1 << len) - 1));
198  }
199  }
200  }
201  }
202  }
203 
204  if (bits)
205  *bits = resbits;
206  return cost;
207 }
208 
209 #define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC) \
210 static float quantize_and_encode_band_cost_ ## NAME( \
211  struct AACEncContext *s, \
212  PutBitContext *pb, const float *in, \
213  const float *scaled, int size, int scale_idx, \
214  int cb, const float lambda, const float uplim, \
215  int *bits) { \
216  return quantize_and_encode_band_cost_template( \
217  s, pb, in, scaled, size, scale_idx, \
218  BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \
219  BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC); \
220 }
221 
223 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0)
224 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0)
225 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0)
226 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0)
227 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1)
228 
229 static float (*const quantize_and_encode_band_cost_arr[])(
230  struct AACEncContext *s,
231  PutBitContext *pb, const float *in,
232  const float *scaled, int size, int scale_idx,
233  int cb, const float lambda, const float uplim,
234  int *bits) = {
235  quantize_and_encode_band_cost_ZERO,
236  quantize_and_encode_band_cost_SQUAD,
237  quantize_and_encode_band_cost_SQUAD,
238  quantize_and_encode_band_cost_UQUAD,
239  quantize_and_encode_band_cost_UQUAD,
240  quantize_and_encode_band_cost_SPAIR,
241  quantize_and_encode_band_cost_SPAIR,
242  quantize_and_encode_band_cost_UPAIR,
243  quantize_and_encode_band_cost_UPAIR,
244  quantize_and_encode_band_cost_UPAIR,
245  quantize_and_encode_band_cost_UPAIR,
246  quantize_and_encode_band_cost_ESC,
247 };
248 
249 #define quantize_and_encode_band_cost( \
250  s, pb, in, scaled, size, scale_idx, cb, \
251  lambda, uplim, bits) \
252  quantize_and_encode_band_cost_arr[cb]( \
253  s, pb, in, scaled, size, scale_idx, cb, \
254  lambda, uplim, bits)
255 
256 static float quantize_band_cost(struct AACEncContext *s, const float *in,
257  const float *scaled, int size, int scale_idx,
258  int cb, const float lambda, const float uplim,
259  int *bits)
260 {
261  return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
262  cb, lambda, uplim, bits);
263 }
264 
266  const float *in, int size, int scale_idx,
267  int cb, const float lambda)
268 {
269  quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
270  INFINITY, NULL);
271 }
272 
273 static float find_max_val(int group_len, int swb_size, const float *scaled) {
274  float maxval = 0.0f;
275  int w2, i;
276  for (w2 = 0; w2 < group_len; w2++) {
277  for (i = 0; i < swb_size; i++) {
278  maxval = FFMAX(maxval, scaled[w2*128+i]);
279  }
280  }
281  return maxval;
282 }
283 
284 static int find_min_book(float maxval, int sf) {
286  float Q34 = sqrtf(Q * sqrtf(Q));
287  int qmaxval, cb;
288  qmaxval = maxval * Q34 + 0.4054f;
289  if (qmaxval == 0) cb = 0;
290  else if (qmaxval == 1) cb = 1;
291  else if (qmaxval == 2) cb = 3;
292  else if (qmaxval <= 4) cb = 5;
293  else if (qmaxval <= 7) cb = 7;
294  else if (qmaxval <= 12) cb = 9;
295  else cb = 11;
296  return cb;
297 }
298 
302 typedef struct BandCodingPath {
303  int prev_idx;
304  float cost;
305  int run;
307 
312  int win, int group_len, const float lambda)
313 {
314  BandCodingPath path[120][12];
315  int w, swb, cb, start, size;
316  int i, j;
317  const int max_sfb = sce->ics.max_sfb;
318  const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
319  const int run_esc = (1 << run_bits) - 1;
320  int idx, ppos, count;
321  int stackrun[120], stackcb[120], stack_len;
322  float next_minrd = INFINITY;
323  int next_mincb = 0;
324 
325  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
326  start = win*128;
327  for (cb = 0; cb < 12; cb++) {
328  path[0][cb].cost = 0.0f;
329  path[0][cb].prev_idx = -1;
330  path[0][cb].run = 0;
331  }
332  for (swb = 0; swb < max_sfb; swb++) {
333  size = sce->ics.swb_sizes[swb];
334  if (sce->zeroes[win*16 + swb]) {
335  for (cb = 0; cb < 12; cb++) {
336  path[swb+1][cb].prev_idx = cb;
337  path[swb+1][cb].cost = path[swb][cb].cost;
338  path[swb+1][cb].run = path[swb][cb].run + 1;
339  }
340  } else {
341  float minrd = next_minrd;
342  int mincb = next_mincb;
343  next_minrd = INFINITY;
344  next_mincb = 0;
345  for (cb = 0; cb < 12; cb++) {
346  float cost_stay_here, cost_get_here;
347  float rd = 0.0f;
348  for (w = 0; w < group_len; w++) {
349  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
350  rd += quantize_band_cost(s, sce->coeffs + start + w*128,
351  s->scoefs + start + w*128, size,
352  sce->sf_idx[(win+w)*16+swb], cb,
353  lambda / band->threshold, INFINITY, NULL);
354  }
355  cost_stay_here = path[swb][cb].cost + rd;
356  cost_get_here = minrd + rd + run_bits + 4;
357  if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
358  != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
359  cost_stay_here += run_bits;
360  if (cost_get_here < cost_stay_here) {
361  path[swb+1][cb].prev_idx = mincb;
362  path[swb+1][cb].cost = cost_get_here;
363  path[swb+1][cb].run = 1;
364  } else {
365  path[swb+1][cb].prev_idx = cb;
366  path[swb+1][cb].cost = cost_stay_here;
367  path[swb+1][cb].run = path[swb][cb].run + 1;
368  }
369  if (path[swb+1][cb].cost < next_minrd) {
370  next_minrd = path[swb+1][cb].cost;
371  next_mincb = cb;
372  }
373  }
374  }
375  start += sce->ics.swb_sizes[swb];
376  }
377 
378  //convert resulting path from backward-linked list
379  stack_len = 0;
380  idx = 0;
381  for (cb = 1; cb < 12; cb++)
382  if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
383  idx = cb;
384  ppos = max_sfb;
385  while (ppos > 0) {
386  cb = idx;
387  stackrun[stack_len] = path[ppos][cb].run;
388  stackcb [stack_len] = cb;
389  idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
390  ppos -= path[ppos][cb].run;
391  stack_len++;
392  }
393  //perform actual band info encoding
394  start = 0;
395  for (i = stack_len - 1; i >= 0; i--) {
396  put_bits(&s->pb, 4, stackcb[i]);
397  count = stackrun[i];
398  memset(sce->zeroes + win*16 + start, !stackcb[i], count);
399  //XXX: memset when band_type is also uint8_t
400  for (j = 0; j < count; j++) {
401  sce->band_type[win*16 + start] = stackcb[i];
402  start++;
403  }
404  while (count >= run_esc) {
405  put_bits(&s->pb, run_bits, run_esc);
406  count -= run_esc;
407  }
408  put_bits(&s->pb, run_bits, count);
409  }
410 }
411 
413  int win, int group_len, const float lambda)
414 {
415  BandCodingPath path[120][12];
416  int w, swb, cb, start, size;
417  int i, j;
418  const int max_sfb = sce->ics.max_sfb;
419  const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
420  const int run_esc = (1 << run_bits) - 1;
421  int idx, ppos, count;
422  int stackrun[120], stackcb[120], stack_len;
423  float next_minrd = INFINITY;
424  int next_mincb = 0;
425 
426  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
427  start = win*128;
428  for (cb = 0; cb < 12; cb++) {
429  path[0][cb].cost = run_bits+4;
430  path[0][cb].prev_idx = -1;
431  path[0][cb].run = 0;
432  }
433  for (swb = 0; swb < max_sfb; swb++) {
434  size = sce->ics.swb_sizes[swb];
435  if (sce->zeroes[win*16 + swb]) {
436  float cost_stay_here = path[swb][0].cost;
437  float cost_get_here = next_minrd + run_bits + 4;
438  if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
439  != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
440  cost_stay_here += run_bits;
441  if (cost_get_here < cost_stay_here) {
442  path[swb+1][0].prev_idx = next_mincb;
443  path[swb+1][0].cost = cost_get_here;
444  path[swb+1][0].run = 1;
445  } else {
446  path[swb+1][0].prev_idx = 0;
447  path[swb+1][0].cost = cost_stay_here;
448  path[swb+1][0].run = path[swb][0].run + 1;
449  }
450  next_minrd = path[swb+1][0].cost;
451  next_mincb = 0;
452  for (cb = 1; cb < 12; cb++) {
453  path[swb+1][cb].cost = 61450;
454  path[swb+1][cb].prev_idx = -1;
455  path[swb+1][cb].run = 0;
456  }
457  } else {
458  float minrd = next_minrd;
459  int mincb = next_mincb;
460  int startcb = sce->band_type[win*16+swb];
461  next_minrd = INFINITY;
462  next_mincb = 0;
463  for (cb = 0; cb < startcb; cb++) {
464  path[swb+1][cb].cost = 61450;
465  path[swb+1][cb].prev_idx = -1;
466  path[swb+1][cb].run = 0;
467  }
468  for (cb = startcb; cb < 12; cb++) {
469  float cost_stay_here, cost_get_here;
470  float rd = 0.0f;
471  for (w = 0; w < group_len; w++) {
472  rd += quantize_band_cost(s, sce->coeffs + start + w*128,
473  s->scoefs + start + w*128, size,
474  sce->sf_idx[(win+w)*16+swb], cb,
475  0, INFINITY, NULL);
476  }
477  cost_stay_here = path[swb][cb].cost + rd;
478  cost_get_here = minrd + rd + run_bits + 4;
479  if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
480  != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
481  cost_stay_here += run_bits;
482  if (cost_get_here < cost_stay_here) {
483  path[swb+1][cb].prev_idx = mincb;
484  path[swb+1][cb].cost = cost_get_here;
485  path[swb+1][cb].run = 1;
486  } else {
487  path[swb+1][cb].prev_idx = cb;
488  path[swb+1][cb].cost = cost_stay_here;
489  path[swb+1][cb].run = path[swb][cb].run + 1;
490  }
491  if (path[swb+1][cb].cost < next_minrd) {
492  next_minrd = path[swb+1][cb].cost;
493  next_mincb = cb;
494  }
495  }
496  }
497  start += sce->ics.swb_sizes[swb];
498  }
499 
500  //convert resulting path from backward-linked list
501  stack_len = 0;
502  idx = 0;
503  for (cb = 1; cb < 12; cb++)
504  if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
505  idx = cb;
506  ppos = max_sfb;
507  while (ppos > 0) {
508  assert(idx >= 0);
509  cb = idx;
510  stackrun[stack_len] = path[ppos][cb].run;
511  stackcb [stack_len] = cb;
512  idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
513  ppos -= path[ppos][cb].run;
514  stack_len++;
515  }
516  //perform actual band info encoding
517  start = 0;
518  for (i = stack_len - 1; i >= 0; i--) {
519  put_bits(&s->pb, 4, stackcb[i]);
520  count = stackrun[i];
521  memset(sce->zeroes + win*16 + start, !stackcb[i], count);
522  //XXX: memset when band_type is also uint8_t
523  for (j = 0; j < count; j++) {
524  sce->band_type[win*16 + start] = stackcb[i];
525  start++;
526  }
527  while (count >= run_esc) {
528  put_bits(&s->pb, run_bits, run_esc);
529  count -= run_esc;
530  }
531  put_bits(&s->pb, run_bits, count);
532  }
533 }
534 
536 static av_always_inline uint8_t coef2minsf(float coef) {
537  return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
538 }
539 
541 static av_always_inline uint8_t coef2maxsf(float coef) {
542  return av_clip_uint8(log2f(coef)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
543 }
544 
545 typedef struct TrellisPath {
546  float cost;
547  int prev;
548 } TrellisPath;
549 
550 #define TRELLIS_STAGES 121
551 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
552 
555  const float lambda)
556 {
557  int q, w, w2, g, start = 0;
558  int i, j;
559  int idx;
561  int bandaddr[TRELLIS_STAGES];
562  int minq;
563  float mincost;
564  float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
565  int q0, q1, qcnt = 0;
566 
567  for (i = 0; i < 1024; i++) {
568  float t = fabsf(sce->coeffs[i]);
569  if (t > 0.0f) {
570  q0f = FFMIN(q0f, t);
571  q1f = FFMAX(q1f, t);
572  qnrgf += t*t;
573  qcnt++;
574  }
575  }
576 
577  if (!qcnt) {
578  memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
579  memset(sce->zeroes, 1, sizeof(sce->zeroes));
580  return;
581  }
582 
583  //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
584  q0 = coef2minsf(q0f);
585  //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
586  q1 = coef2maxsf(q1f);
587  //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
588  if (q1 - q0 > 60) {
589  int q0low = q0;
590  int q1high = q1;
591  //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
592  int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
593  q1 = qnrg + 30;
594  q0 = qnrg - 30;
595  //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
596  if (q0 < q0low) {
597  q1 += q0low - q0;
598  q0 = q0low;
599  } else if (q1 > q1high) {
600  q0 -= q1 - q1high;
601  q1 = q1high;
602  }
603  }
604  //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
605 
606  for (i = 0; i < TRELLIS_STATES; i++) {
607  paths[0][i].cost = 0.0f;
608  paths[0][i].prev = -1;
609  }
610  for (j = 1; j < TRELLIS_STAGES; j++) {
611  for (i = 0; i < TRELLIS_STATES; i++) {
612  paths[j][i].cost = INFINITY;
613  paths[j][i].prev = -2;
614  }
615  }
616  idx = 1;
617  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
618  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
619  start = w*128;
620  for (g = 0; g < sce->ics.num_swb; g++) {
621  const float *coefs = sce->coeffs + start;
622  float qmin, qmax;
623  int nz = 0;
624 
625  bandaddr[idx] = w * 16 + g;
626  qmin = INT_MAX;
627  qmax = 0.0f;
628  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
629  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
630  if (band->energy <= band->threshold || band->threshold == 0.0f) {
631  sce->zeroes[(w+w2)*16+g] = 1;
632  continue;
633  }
634  sce->zeroes[(w+w2)*16+g] = 0;
635  nz = 1;
636  for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
637  float t = fabsf(coefs[w2*128+i]);
638  if (t > 0.0f)
639  qmin = FFMIN(qmin, t);
640  qmax = FFMAX(qmax, t);
641  }
642  }
643  if (nz) {
644  int minscale, maxscale;
645  float minrd = INFINITY;
646  float maxval;
647  //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
648  minscale = coef2minsf(qmin);
649  //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
650  maxscale = coef2maxsf(qmax);
651  minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
652  maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
653  maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
654  for (q = minscale; q < maxscale; q++) {
655  float dist = 0;
656  int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
657  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
658  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
659  dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
660  q + q0, cb, lambda / band->threshold, INFINITY, NULL);
661  }
662  minrd = FFMIN(minrd, dist);
663 
664  for (i = 0; i < q1 - q0; i++) {
665  float cost;
666  cost = paths[idx - 1][i].cost + dist
668  if (cost < paths[idx][q].cost) {
669  paths[idx][q].cost = cost;
670  paths[idx][q].prev = i;
671  }
672  }
673  }
674  } else {
675  for (q = 0; q < q1 - q0; q++) {
676  paths[idx][q].cost = paths[idx - 1][q].cost + 1;
677  paths[idx][q].prev = q;
678  }
679  }
680  sce->zeroes[w*16+g] = !nz;
681  start += sce->ics.swb_sizes[g];
682  idx++;
683  }
684  }
685  idx--;
686  mincost = paths[idx][0].cost;
687  minq = 0;
688  for (i = 1; i < TRELLIS_STATES; i++) {
689  if (paths[idx][i].cost < mincost) {
690  mincost = paths[idx][i].cost;
691  minq = i;
692  }
693  }
694  while (idx) {
695  sce->sf_idx[bandaddr[idx]] = minq + q0;
696  minq = paths[idx][minq].prev;
697  idx--;
698  }
699  //set the same quantizers inside window groups
700  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
701  for (g = 0; g < sce->ics.num_swb; g++)
702  for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
703  sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
704 }
705 
710  AACEncContext *s,
712  const float lambda)
713 {
714  int start = 0, i, w, w2, g;
715  int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
716  float dists[128], uplims[128];
717  float maxvals[128];
718  int fflag, minscaler;
719  int its = 0;
720  int allz = 0;
721  float minthr = INFINITY;
722 
723  //XXX: some heuristic to determine initial quantizers will reduce search time
724  memset(dists, 0, sizeof(dists));
725  //determine zero bands and upper limits
726  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
727  for (g = 0; g < sce->ics.num_swb; g++) {
728  int nz = 0;
729  float uplim = 0.0f;
730  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
731  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
732  uplim += band->threshold;
733  if (band->energy <= band->threshold || band->threshold == 0.0f) {
734  sce->zeroes[(w+w2)*16+g] = 1;
735  continue;
736  }
737  nz = 1;
738  }
739  uplims[w*16+g] = uplim *512;
740  sce->zeroes[w*16+g] = !nz;
741  if (nz)
742  minthr = FFMIN(minthr, uplim);
743  allz |= nz;
744  }
745  }
746  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
747  for (g = 0; g < sce->ics.num_swb; g++) {
748  if (sce->zeroes[w*16+g]) {
749  sce->sf_idx[w*16+g] = SCALE_ONE_POS;
750  continue;
751  }
752  sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
753  }
754  }
755 
756  if (!allz)
757  return;
758  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
759 
760  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
761  start = w*128;
762  for (g = 0; g < sce->ics.num_swb; g++) {
763  const float *scaled = s->scoefs + start;
764  maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
765  start += sce->ics.swb_sizes[g];
766  }
767  }
768 
769  //perform two-loop search
770  //outer loop - improve quality
771  do {
772  int tbits, qstep;
773  minscaler = sce->sf_idx[0];
774  //inner loop - quantize spectrum to fit into given number of bits
775  qstep = its ? 1 : 32;
776  do {
777  int prev = -1;
778  tbits = 0;
779  fflag = 0;
780  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
781  start = w*128;
782  for (g = 0; g < sce->ics.num_swb; g++) {
783  const float *coefs = sce->coeffs + start;
784  const float *scaled = s->scoefs + start;
785  int bits = 0;
786  int cb;
787  float dist = 0.0f;
788 
789  if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
790  start += sce->ics.swb_sizes[g];
791  continue;
792  }
793  minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
794  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
795  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
796  int b;
797  dist += quantize_band_cost(s, coefs + w2*128,
798  scaled + w2*128,
799  sce->ics.swb_sizes[g],
800  sce->sf_idx[w*16+g],
801  cb,
802  1.0f,
803  INFINITY,
804  &b);
805  bits += b;
806  }
807  dists[w*16+g] = dist - bits;
808  if (prev != -1) {
809  bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
810  }
811  tbits += bits;
812  start += sce->ics.swb_sizes[g];
813  prev = sce->sf_idx[w*16+g];
814  }
815  }
816  if (tbits > destbits) {
817  for (i = 0; i < 128; i++)
818  if (sce->sf_idx[i] < 218 - qstep)
819  sce->sf_idx[i] += qstep;
820  } else {
821  for (i = 0; i < 128; i++)
822  if (sce->sf_idx[i] > 60 - qstep)
823  sce->sf_idx[i] -= qstep;
824  }
825  qstep >>= 1;
826  if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
827  qstep = 1;
828  } while (qstep);
829 
830  fflag = 0;
831  minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
832  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
833  for (g = 0; g < sce->ics.num_swb; g++) {
834  int prevsc = sce->sf_idx[w*16+g];
835  if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
836  if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
837  sce->sf_idx[w*16+g]--;
838  else //Try to make sure there is some energy in every band
839  sce->sf_idx[w*16+g]-=2;
840  }
841  sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
842  sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
843  if (sce->sf_idx[w*16+g] != prevsc)
844  fflag = 1;
845  sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
846  }
847  }
848  its++;
849  } while (fflag && its < 10);
850 }
851 
854  const float lambda)
855 {
856  int start = 0, i, w, w2, g;
857  float uplim[128], maxq[128];
858  int minq, maxsf;
859  float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
860  int last = 0, lastband = 0, curband = 0;
861  float avg_energy = 0.0;
862  if (sce->ics.num_windows == 1) {
863  start = 0;
864  for (i = 0; i < 1024; i++) {
865  if (i - start >= sce->ics.swb_sizes[curband]) {
866  start += sce->ics.swb_sizes[curband];
867  curband++;
868  }
869  if (sce->coeffs[i]) {
870  avg_energy += sce->coeffs[i] * sce->coeffs[i];
871  last = i;
872  lastband = curband;
873  }
874  }
875  } else {
876  for (w = 0; w < 8; w++) {
877  const float *coeffs = sce->coeffs + w*128;
878  start = 0;
879  for (i = 0; i < 128; i++) {
880  if (i - start >= sce->ics.swb_sizes[curband]) {
881  start += sce->ics.swb_sizes[curband];
882  curband++;
883  }
884  if (coeffs[i]) {
885  avg_energy += coeffs[i] * coeffs[i];
886  last = FFMAX(last, i);
887  lastband = FFMAX(lastband, curband);
888  }
889  }
890  }
891  }
892  last++;
893  avg_energy /= last;
894  if (avg_energy == 0.0f) {
895  for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
896  sce->sf_idx[i] = SCALE_ONE_POS;
897  return;
898  }
899  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
900  start = w*128;
901  for (g = 0; g < sce->ics.num_swb; g++) {
902  float *coefs = sce->coeffs + start;
903  const int size = sce->ics.swb_sizes[g];
904  int start2 = start, end2 = start + size, peakpos = start;
905  float maxval = -1, thr = 0.0f, t;
906  maxq[w*16+g] = 0.0f;
907  if (g > lastband) {
908  maxq[w*16+g] = 0.0f;
909  start += size;
910  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
911  memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
912  continue;
913  }
914  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
915  for (i = 0; i < size; i++) {
916  float t = coefs[w2*128+i]*coefs[w2*128+i];
917  maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
918  thr += t;
919  if (sce->ics.num_windows == 1 && maxval < t) {
920  maxval = t;
921  peakpos = start+i;
922  }
923  }
924  }
925  if (sce->ics.num_windows == 1) {
926  start2 = FFMAX(peakpos - 2, start2);
927  end2 = FFMIN(peakpos + 3, end2);
928  } else {
929  start2 -= start;
930  end2 -= start;
931  }
932  start += size;
933  thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
934  t = 1.0 - (1.0 * start2 / last);
935  uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
936  }
937  }
938  memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
939  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
940  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
941  start = w*128;
942  for (g = 0; g < sce->ics.num_swb; g++) {
943  const float *coefs = sce->coeffs + start;
944  const float *scaled = s->scoefs + start;
945  const int size = sce->ics.swb_sizes[g];
946  int scf, prev_scf, step;
947  int min_scf = -1, max_scf = 256;
948  float curdiff;
949  if (maxq[w*16+g] < 21.544) {
950  sce->zeroes[w*16+g] = 1;
951  start += size;
952  continue;
953  }
954  sce->zeroes[w*16+g] = 0;
955  scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218);
956  step = 16;
957  for (;;) {
958  float dist = 0.0f;
959  int quant_max;
960 
961  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
962  int b;
963  dist += quantize_band_cost(s, coefs + w2*128,
964  scaled + w2*128,
965  sce->ics.swb_sizes[g],
966  scf,
967  ESC_BT,
968  lambda,
969  INFINITY,
970  &b);
971  dist -= b;
972  }
973  dist *= 1.0f / 512.0f / lambda;
974  quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]);
975  if (quant_max >= 8191) { // too much, return to the previous quantizer
976  sce->sf_idx[w*16+g] = prev_scf;
977  break;
978  }
979  prev_scf = scf;
980  curdiff = fabsf(dist - uplim[w*16+g]);
981  if (curdiff <= 1.0f)
982  step = 0;
983  else
984  step = log2f(curdiff);
985  if (dist > uplim[w*16+g])
986  step = -step;
987  scf += step;
988  scf = av_clip_uint8(scf);
989  step = scf - prev_scf;
990  if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
991  sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
992  break;
993  }
994  if (step > 0)
995  min_scf = prev_scf;
996  else
997  max_scf = prev_scf;
998  }
999  start += size;
1000  }
1001  }
1002  minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
1003  for (i = 1; i < 128; i++) {
1004  if (!sce->sf_idx[i])
1005  sce->sf_idx[i] = sce->sf_idx[i-1];
1006  else
1007  minq = FFMIN(minq, sce->sf_idx[i]);
1008  }
1009  if (minq == INT_MAX)
1010  minq = 0;
1011  minq = FFMIN(minq, SCALE_MAX_POS);
1012  maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
1013  for (i = 126; i >= 0; i--) {
1014  if (!sce->sf_idx[i])
1015  sce->sf_idx[i] = sce->sf_idx[i+1];
1016  sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
1017  }
1018 }
1019 
1021  SingleChannelElement *sce,
1022  const float lambda)
1023 {
1024  int i, w, w2, g;
1025  int minq = 255;
1026 
1027  memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
1028  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
1029  for (g = 0; g < sce->ics.num_swb; g++) {
1030  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
1031  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
1032  if (band->energy <= band->threshold) {
1033  sce->sf_idx[(w+w2)*16+g] = 218;
1034  sce->zeroes[(w+w2)*16+g] = 1;
1035  } else {
1036  sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
1037  sce->zeroes[(w+w2)*16+g] = 0;
1038  }
1039  minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
1040  }
1041  }
1042  }
1043  for (i = 0; i < 128; i++) {
1044  sce->sf_idx[i] = 140;
1045  //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
1046  }
1047  //set the same quantizers inside window groups
1048  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
1049  for (g = 0; g < sce->ics.num_swb; g++)
1050  for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
1051  sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
1052 }
1053 
1055  const float lambda)
1056 {
1057  int start = 0, i, w, w2, g;
1058  float M[128], S[128];
1059  float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
1060  SingleChannelElement *sce0 = &cpe->ch[0];
1061  SingleChannelElement *sce1 = &cpe->ch[1];
1062  if (!cpe->common_window)
1063  return;
1064  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
1065  for (g = 0; g < sce0->ics.num_swb; g++) {
1066  if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
1067  float dist1 = 0.0f, dist2 = 0.0f;
1068  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
1069  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
1070  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
1071  float minthr = FFMIN(band0->threshold, band1->threshold);
1072  float maxthr = FFMAX(band0->threshold, band1->threshold);
1073  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
1074  M[i] = (sce0->coeffs[start+w2*128+i]
1075  + sce1->coeffs[start+w2*128+i]) * 0.5;
1076  S[i] = M[i]
1077  - sce1->coeffs[start+w2*128+i];
1078  }
1079  abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
1080  abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
1081  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
1082  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
1083  dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
1084  L34,
1085  sce0->ics.swb_sizes[g],
1086  sce0->sf_idx[(w+w2)*16+g],
1087  sce0->band_type[(w+w2)*16+g],
1088  lambda / band0->threshold, INFINITY, NULL);
1089  dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
1090  R34,
1091  sce1->ics.swb_sizes[g],
1092  sce1->sf_idx[(w+w2)*16+g],
1093  sce1->band_type[(w+w2)*16+g],
1094  lambda / band1->threshold, INFINITY, NULL);
1095  dist2 += quantize_band_cost(s, M,
1096  M34,
1097  sce0->ics.swb_sizes[g],
1098  sce0->sf_idx[(w+w2)*16+g],
1099  sce0->band_type[(w+w2)*16+g],
1100  lambda / maxthr, INFINITY, NULL);
1101  dist2 += quantize_band_cost(s, S,
1102  S34,
1103  sce1->ics.swb_sizes[g],
1104  sce1->sf_idx[(w+w2)*16+g],
1105  sce1->band_type[(w+w2)*16+g],
1106  lambda / minthr, INFINITY, NULL);
1107  }
1108  cpe->ms_mask[w*16+g] = dist2 < dist1;
1109  }
1110  start += sce0->ics.swb_sizes[g];
1111  }
1112  }
1113 }
1114 
1116  {
1120  search_for_ms,
1121  },
1122  {
1126  search_for_ms,
1127  },
1128  {
1132  search_for_ms,
1133  },
1134  {
1138  search_for_ms,
1139  },
1140 };