af_amix.c
Go to the documentation of this file.
1 /*
2  * Audio Mix Filter
3  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
31 #include "libavutil/audio_fifo.h"
32 #include "libavutil/avassert.h"
33 #include "libavutil/avstring.h"
35 #include "libavutil/common.h"
36 #include "libavutil/float_dsp.h"
37 #include "libavutil/mathematics.h"
38 #include "libavutil/opt.h"
39 #include "libavutil/samplefmt.h"
40 
41 #include "audio.h"
42 #include "avfilter.h"
43 #include "formats.h"
44 #include "internal.h"
45 
46 #define INPUT_OFF 0
47 #define INPUT_ON 1
48 #define INPUT_INACTIVE 2
50 #define DURATION_LONGEST 0
51 #define DURATION_SHORTEST 1
52 #define DURATION_FIRST 2
53 
54 
55 typedef struct FrameInfo {
57  int64_t pts;
58  struct FrameInfo *next;
59 } FrameInfo;
60 
69 typedef struct FrameList {
70  int nb_frames;
74 } FrameList;
75 
76 static void frame_list_clear(FrameList *frame_list)
77 {
78  if (frame_list) {
79  while (frame_list->list) {
80  FrameInfo *info = frame_list->list;
81  frame_list->list = info->next;
82  av_free(info);
83  }
84  frame_list->nb_frames = 0;
85  frame_list->nb_samples = 0;
86  frame_list->end = NULL;
87  }
88 }
89 
90 static int frame_list_next_frame_size(FrameList *frame_list)
91 {
92  if (!frame_list->list)
93  return 0;
94  return frame_list->list->nb_samples;
95 }
96 
97 static int64_t frame_list_next_pts(FrameList *frame_list)
98 {
99  if (!frame_list->list)
100  return AV_NOPTS_VALUE;
101  return frame_list->list->pts;
102 }
103 
104 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
105 {
106  if (nb_samples >= frame_list->nb_samples) {
107  frame_list_clear(frame_list);
108  } else {
109  int samples = nb_samples;
110  while (samples > 0) {
111  FrameInfo *info = frame_list->list;
112  av_assert0(info != NULL);
113  if (info->nb_samples <= samples) {
114  samples -= info->nb_samples;
115  frame_list->list = info->next;
116  if (!frame_list->list)
117  frame_list->end = NULL;
118  frame_list->nb_frames--;
119  frame_list->nb_samples -= info->nb_samples;
120  av_free(info);
121  } else {
122  info->nb_samples -= samples;
123  info->pts += samples;
124  frame_list->nb_samples -= samples;
125  samples = 0;
126  }
127  }
128  }
129 }
130 
131 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
132 {
133  FrameInfo *info = av_malloc(sizeof(*info));
134  if (!info)
135  return AVERROR(ENOMEM);
136  info->nb_samples = nb_samples;
137  info->pts = pts;
138  info->next = NULL;
139 
140  if (!frame_list->list) {
141  frame_list->list = info;
142  frame_list->end = info;
143  } else {
144  av_assert0(frame_list->end != NULL);
145  frame_list->end->next = info;
146  frame_list->end = info;
147  }
148  frame_list->nb_frames++;
149  frame_list->nb_samples += nb_samples;
150 
151  return 0;
152 }
153 
154 
155 typedef struct MixContext {
156  const AVClass *class;
158 
159  int nb_inputs;
166  int planar;
169  float *input_scale;
170  float scale_norm;
171  int64_t next_pts;
173 } MixContext;
174 
175 #define OFFSET(x) offsetof(MixContext, x)
176 #define A AV_OPT_FLAG_AUDIO_PARAM
177 static const AVOption options[] = {
178  { "inputs", "Number of inputs.",
179  OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A },
180  { "duration", "How to determine the end-of-stream.",
181  OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A, "duration" },
182  { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A, "duration" },
183  { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A, "duration" },
184  { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A, "duration" },
185  { "dropout_transition", "Transition time, in seconds, for volume "
186  "renormalization when an input stream ends.",
187  OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A },
188  { NULL },
189 };
190 
191 static const AVClass amix_class = {
192  .class_name = "amix filter",
193  .item_name = av_default_item_name,
194  .option = options,
195  .version = LIBAVUTIL_VERSION_INT,
196 };
197 
198 
207 {
208  int i;
209 
210  if (s->scale_norm > s->active_inputs) {
211  s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
213  }
214 
215  for (i = 0; i < s->nb_inputs; i++) {
216  if (s->input_state[i] == INPUT_ON)
217  s->input_scale[i] = 1.0f / s->scale_norm;
218  else
219  s->input_scale[i] = 0.0f;
220  }
221 }
222 
223 static int config_output(AVFilterLink *outlink)
224 {
225  AVFilterContext *ctx = outlink->src;
226  MixContext *s = ctx->priv;
227  int i;
228  char buf[64];
229 
230  s->planar = av_sample_fmt_is_planar(outlink->format);
231  s->sample_rate = outlink->sample_rate;
232  outlink->time_base = (AVRational){ 1, outlink->sample_rate };
234 
235  s->frame_list = av_mallocz(sizeof(*s->frame_list));
236  if (!s->frame_list)
237  return AVERROR(ENOMEM);
238 
239  s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
240  if (!s->fifos)
241  return AVERROR(ENOMEM);
242 
244  for (i = 0; i < s->nb_inputs; i++) {
245  s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
246  if (!s->fifos[i])
247  return AVERROR(ENOMEM);
248  }
249 
251  if (!s->input_state)
252  return AVERROR(ENOMEM);
253  memset(s->input_state, INPUT_ON, s->nb_inputs);
254  s->active_inputs = s->nb_inputs;
255 
256  s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
257  if (!s->input_scale)
258  return AVERROR(ENOMEM);
259  s->scale_norm = s->active_inputs;
260  calculate_scales(s, 0);
261 
262  av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
263 
264  av_log(ctx, AV_LOG_VERBOSE,
265  "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
266  av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
267 
268  return 0;
269 }
270 
274 static int output_frame(AVFilterLink *outlink, int nb_samples)
275 {
276  AVFilterContext *ctx = outlink->src;
277  MixContext *s = ctx->priv;
278  AVFilterBufferRef *out_buf, *in_buf;
279  int i;
280 
281  calculate_scales(s, nb_samples);
282 
283  out_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
284  if (!out_buf)
285  return AVERROR(ENOMEM);
286 
287  in_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
288  if (!in_buf) {
289  avfilter_unref_buffer(out_buf);
290  return AVERROR(ENOMEM);
291  }
292 
293  for (i = 0; i < s->nb_inputs; i++) {
294  if (s->input_state[i] == INPUT_ON) {
295  int planes, plane_size, p;
296 
297  av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
298  nb_samples);
299 
300  planes = s->planar ? s->nb_channels : 1;
301  plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
302  plane_size = FFALIGN(plane_size, 16);
303 
304  for (p = 0; p < planes; p++) {
305  s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p],
306  (float *) in_buf->extended_data[p],
307  s->input_scale[i], plane_size);
308  }
309  }
310  }
311  avfilter_unref_buffer(in_buf);
312 
313  out_buf->pts = s->next_pts;
314  if (s->next_pts != AV_NOPTS_VALUE)
315  s->next_pts += nb_samples;
316 
317  return ff_filter_frame(outlink, out_buf);
318 }
319 
325 {
326  int i;
327  int available_samples = INT_MAX;
328 
329  av_assert0(s->nb_inputs > 1);
330 
331  for (i = 1; i < s->nb_inputs; i++) {
332  int nb_samples;
333  if (s->input_state[i] == INPUT_OFF)
334  continue;
335  nb_samples = av_audio_fifo_size(s->fifos[i]);
336  available_samples = FFMIN(available_samples, nb_samples);
337  }
338  if (available_samples == INT_MAX)
339  return 0;
340  return available_samples;
341 }
342 
346 static int request_samples(AVFilterContext *ctx, int min_samples)
347 {
348  MixContext *s = ctx->priv;
349  int i, ret;
350 
351  av_assert0(s->nb_inputs > 1);
352 
353  for (i = 1; i < s->nb_inputs; i++) {
354  ret = 0;
355  if (s->input_state[i] == INPUT_OFF)
356  continue;
357  while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
358  ret = ff_request_frame(ctx->inputs[i]);
359  if (ret == AVERROR_EOF) {
360  if (av_audio_fifo_size(s->fifos[i]) == 0) {
361  s->input_state[i] = INPUT_OFF;
362  continue;
363  }
364  } else if (ret < 0)
365  return ret;
366  }
367  return 0;
368 }
369 
377 {
378  int i;
379  int active_inputs = 0;
380  for (i = 0; i < s->nb_inputs; i++)
381  active_inputs += !!(s->input_state[i] != INPUT_OFF);
382  s->active_inputs = active_inputs;
383 
384  if (!active_inputs ||
385  (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
386  (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
387  return AVERROR_EOF;
388  return 0;
389 }
390 
391 static int request_frame(AVFilterLink *outlink)
392 {
393  AVFilterContext *ctx = outlink->src;
394  MixContext *s = ctx->priv;
395  int ret;
396  int wanted_samples, available_samples;
397 
398  ret = calc_active_inputs(s);
399  if (ret < 0)
400  return ret;
401 
402  if (s->input_state[0] == INPUT_OFF) {
403  ret = request_samples(ctx, 1);
404  if (ret < 0)
405  return ret;
406 
407  ret = calc_active_inputs(s);
408  if (ret < 0)
409  return ret;
410 
411  available_samples = get_available_samples(s);
412  if (!available_samples)
413  return AVERROR(EAGAIN);
414 
415  return output_frame(outlink, available_samples);
416  }
417 
418  if (s->frame_list->nb_frames == 0) {
419  ret = ff_request_frame(ctx->inputs[0]);
420  if (ret == AVERROR_EOF) {
421  s->input_state[0] = INPUT_OFF;
422  if (s->nb_inputs == 1)
423  return AVERROR_EOF;
424  else
425  return AVERROR(EAGAIN);
426  } else if (ret < 0)
427  return ret;
428  }
430 
431  wanted_samples = frame_list_next_frame_size(s->frame_list);
432 
433  if (s->active_inputs > 1) {
434  ret = request_samples(ctx, wanted_samples);
435  if (ret < 0)
436  return ret;
437 
438  ret = calc_active_inputs(s);
439  if (ret < 0)
440  return ret;
441  }
442 
443  if (s->active_inputs > 1) {
444  available_samples = get_available_samples(s);
445  if (!available_samples)
446  return AVERROR(EAGAIN);
447  available_samples = FFMIN(available_samples, wanted_samples);
448  } else {
449  available_samples = wanted_samples;
450  }
451 
453  frame_list_remove_samples(s->frame_list, available_samples);
454 
455  return output_frame(outlink, available_samples);
456 }
457 
458 static int filter_frame(AVFilterLink *inlink, AVFilterBufferRef *buf)
459 {
460  AVFilterContext *ctx = inlink->dst;
461  MixContext *s = ctx->priv;
462  AVFilterLink *outlink = ctx->outputs[0];
463  int i, ret = 0;
464 
465  for (i = 0; i < ctx->nb_inputs; i++)
466  if (ctx->inputs[i] == inlink)
467  break;
468  if (i >= ctx->nb_inputs) {
469  av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
470  ret = AVERROR(EINVAL);
471  goto fail;
472  }
473 
474  if (i == 0) {
475  int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
476  outlink->time_base);
477  ret = frame_list_add_frame(s->frame_list, buf->audio->nb_samples, pts);
478  if (ret < 0)
479  goto fail;
480  }
481 
482  ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
483  buf->audio->nb_samples);
484 
485 fail:
487 
488  return ret;
489 }
490 
491 static int init(AVFilterContext *ctx, const char *args)
492 {
493  MixContext *s = ctx->priv;
494  int i, ret;
495 
496  s->class = &amix_class;
498 
499  if ((ret = av_set_options_string(s, args, "=", ":")) < 0) {
500  av_log(ctx, AV_LOG_ERROR, "Error parsing options string '%s'.\n", args);
501  return ret;
502  }
503  av_opt_free(s);
504 
505  for (i = 0; i < s->nb_inputs; i++) {
506  char name[32];
507  AVFilterPad pad = { 0 };
508 
509  snprintf(name, sizeof(name), "input%d", i);
510  pad.type = AVMEDIA_TYPE_AUDIO;
511  pad.name = av_strdup(name);
513 
514  ff_insert_inpad(ctx, i, &pad);
515  }
516 
517  avpriv_float_dsp_init(&s->fdsp, 0);
518 
519  return 0;
520 }
521 
522 static void uninit(AVFilterContext *ctx)
523 {
524  int i;
525  MixContext *s = ctx->priv;
526 
527  if (s->fifos) {
528  for (i = 0; i < s->nb_inputs; i++)
529  av_audio_fifo_free(s->fifos[i]);
530  av_freep(&s->fifos);
531  }
533  av_freep(&s->frame_list);
534  av_freep(&s->input_state);
535  av_freep(&s->input_scale);
536 
537  for (i = 0; i < ctx->nb_inputs; i++)
538  av_freep(&ctx->input_pads[i].name);
539 }
540 
542 {
544  ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
546  ff_set_common_formats(ctx, formats);
549  return 0;
550 }
551 
553  {
554  .name = "default",
555  .type = AVMEDIA_TYPE_AUDIO,
556  .config_props = config_output,
557  .request_frame = request_frame
558  },
559  { NULL }
560 };
561 
563  .name = "amix",
564  .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
565  .priv_size = sizeof(MixContext),
566 
567  .init = init,
568  .uninit = uninit,
570 
571  .inputs = NULL,
572  .outputs = avfilter_af_amix_outputs,
573 };