xmv.c
Go to the documentation of this file.
1 /*
2  * Microsoft XMV demuxer
3  * Copyright (c) 2011 Sven Hesse <drmccoy@drmccoy.de>
4  * Copyright (c) 2011 Matthew Hoops <clone2727@gmail.com>
5  *
6  * This file is part of Libav.
7  *
8  * Libav is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * Libav is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with Libav; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
28 #include <stdint.h>
29 
30 #include "libavutil/intreadwrite.h"
31 
32 #include "avformat.h"
33 #include "internal.h"
34 #include "riff.h"
35 
36 #define XMV_MIN_HEADER_SIZE 36
37 
38 #define XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT 1
39 #define XMV_AUDIO_ADPCM51_FRONTCENTERLOW 2
40 #define XMV_AUDIO_ADPCM51_REARLEFTRIGHT 4
41 
42 #define XMV_AUDIO_ADPCM51 (XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT | \
43  XMV_AUDIO_ADPCM51_FRONTCENTERLOW | \
44  XMV_AUDIO_ADPCM51_REARLEFTRIGHT)
45 
46 #define XMV_BLOCK_ALIGN_SIZE 36
47 
48 typedef struct XMVAudioTrack {
49  uint16_t compression;
50  uint16_t channels;
51  uint32_t sample_rate;
52  uint16_t bits_per_sample;
53  uint32_t bit_rate;
54  uint16_t flags;
55  uint16_t block_align;
56  uint16_t block_samples;
57 
60 
61 typedef struct XMVVideoPacket {
62  /* The decoder stream index for this video packet. */
64 
65  uint32_t data_size;
66  uint32_t data_offset;
67 
68  uint32_t current_frame;
69  uint32_t frame_count;
70 
71  /* Does the video packet contain extra data? */
73 
74  /* Extra data */
75  uint8_t extradata[4];
76 
77  int64_t last_pts;
78  int64_t pts;
80 
81 typedef struct XMVAudioPacket {
82  /* The decoder stream index for this audio packet. */
84 
85  /* The audio track this packet encodes. */
87 
88  uint32_t data_size;
89  uint32_t data_offset;
90 
91  uint32_t frame_size;
92 
93  uint32_t block_count;
95 
96 typedef struct XMVDemuxContext {
98 
100 
103 
106 
107  uint16_t current_stream;
108  uint16_t stream_count;
109 
113 
114 static int xmv_probe(AVProbeData *p)
115 {
116  uint32_t file_version;
117 
118  if (p->buf_size < XMV_MIN_HEADER_SIZE)
119  return 0;
120 
121  file_version = AV_RL32(p->buf + 16);
122  if ((file_version == 0) || (file_version > 4))
123  return 0;
124 
125  if (!memcmp(p->buf + 12, "xobX", 4))
126  return AVPROBE_SCORE_MAX;
127 
128  return 0;
129 }
130 
132 {
133  XMVDemuxContext *xmv = s->priv_data;
134 
135  av_free(xmv->audio);
136  av_free(xmv->audio_tracks);
137 
138  return 0;
139 }
140 
142  AVFormatParameters *ap)
143 {
144  XMVDemuxContext *xmv = s->priv_data;
145  AVIOContext *pb = s->pb;
146  AVStream *vst = NULL;
147 
148  uint32_t file_version;
149  uint32_t this_packet_size;
150  uint16_t audio_track;
151  int ret;
152 
153  avio_skip(pb, 4); /* Next packet size */
154 
155  this_packet_size = avio_rl32(pb);
156 
157  avio_skip(pb, 4); /* Max packet size */
158  avio_skip(pb, 4); /* "xobX" */
159 
160  file_version = avio_rl32(pb);
161  if ((file_version != 4) && (file_version != 2))
162  av_log_ask_for_sample(s, "Found uncommon version %d\n", file_version);
163 
164 
165  /* Video track */
166 
167  vst = avformat_new_stream(s, NULL);
168  if (!vst)
169  return AVERROR(ENOMEM);
170 
171  avpriv_set_pts_info(vst, 32, 1, 1000);
172 
174  vst->codec->codec_id = CODEC_ID_WMV2;
175  vst->codec->codec_tag = MKBETAG('W', 'M', 'V', '2');
176  vst->codec->width = avio_rl32(pb);
177  vst->codec->height = avio_rl32(pb);
178 
179  vst->duration = avio_rl32(pb);
180 
181  xmv->video.stream_index = vst->index;
182 
183  /* Audio tracks */
184 
185  xmv->audio_track_count = avio_rl16(pb);
186 
187  avio_skip(pb, 2); /* Unknown (padding?) */
188 
189  xmv->audio_tracks = av_malloc(xmv->audio_track_count * sizeof(XMVAudioTrack));
190  if (!xmv->audio_tracks)
191  return AVERROR(ENOMEM);
192 
193  xmv->audio = av_malloc(xmv->audio_track_count * sizeof(XMVAudioPacket));
194  if (!xmv->audio) {
195  ret = AVERROR(ENOMEM);
196  goto fail;
197  }
198 
199  for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) {
200  XMVAudioTrack *track = &xmv->audio_tracks[audio_track];
201  XMVAudioPacket *packet = &xmv->audio [audio_track];
202  AVStream *ast = NULL;
203 
204  track->compression = avio_rl16(pb);
205  track->channels = avio_rl16(pb);
206  track->sample_rate = avio_rl32(pb);
207  track->bits_per_sample = avio_rl16(pb);
208  track->flags = avio_rl16(pb);
209 
210  track->bit_rate = track->bits_per_sample *
211  track->sample_rate *
212  track->channels;
213  track->block_align = XMV_BLOCK_ALIGN_SIZE * track->channels;
214  track->block_samples = 64;
215  track->codec_id = ff_wav_codec_get_id(track->compression,
216  track->bits_per_sample);
217 
218  packet->track = track;
219  packet->stream_index = -1;
220 
221  packet->frame_size = 0;
222  packet->block_count = 0;
223 
224  /* TODO: ADPCM'd 5.1 sound is encoded in three separate streams.
225  * Those need to be interleaved to a proper 5.1 stream. */
226  if (track->flags & XMV_AUDIO_ADPCM51)
227  av_log(s, AV_LOG_WARNING, "Unsupported 5.1 ADPCM audio stream "
228  "(0x%04X)\n", track->flags);
229 
230  if (!track->channels || !track->sample_rate ||
231  track->channels >= UINT16_MAX / XMV_BLOCK_ALIGN_SIZE) {
232  av_log(s, AV_LOG_ERROR, "Invalid parameters for audio track %d.\n",
233  audio_track);
234  ret = AVERROR_INVALIDDATA;
235  goto fail;
236  }
237 
238  ast = avformat_new_stream(s, NULL);
239  if (!ast) {
240  ret = AVERROR(ENOMEM);
241  goto fail;
242  }
243 
245  ast->codec->codec_id = track->codec_id;
246  ast->codec->codec_tag = track->compression;
247  ast->codec->channels = track->channels;
248  ast->codec->sample_rate = track->sample_rate;
250  ast->codec->bit_rate = track->bit_rate;
251  ast->codec->block_align = 36 * track->channels;
252 
253  avpriv_set_pts_info(ast, 32, track->block_samples, track->sample_rate);
254 
255  packet->stream_index = ast->index;
256 
257  ast->duration = vst->duration;
258  }
259 
260 
263  xmv->next_packet_offset = avio_tell(pb);
264  xmv->next_packet_size = this_packet_size - xmv->next_packet_offset;
265  xmv->stream_count = xmv->audio_track_count + 1;
266 
267  return 0;
268 
269 fail:
270  xmv_read_close(s);
271  return ret;
272 }
273 
274 static void xmv_read_extradata(uint8_t *extradata, AVIOContext *pb)
275 {
276  /* Read the XMV extradata */
277 
278  uint32_t data = avio_rl32(pb);
279 
280  int mspel_bit = !!(data & 0x01);
281  int loop_filter = !!(data & 0x02);
282  int abt_flag = !!(data & 0x04);
283  int j_type_bit = !!(data & 0x08);
284  int top_left_mv_flag = !!(data & 0x10);
285  int per_mb_rl_bit = !!(data & 0x20);
286  int slice_count = (data >> 6) & 7;
287 
288  /* Write it back as standard WMV2 extradata */
289 
290  data = 0;
291 
292  data |= mspel_bit << 15;
293  data |= loop_filter << 14;
294  data |= abt_flag << 13;
295  data |= j_type_bit << 12;
296  data |= top_left_mv_flag << 11;
297  data |= per_mb_rl_bit << 10;
298  data |= slice_count << 7;
299 
300  AV_WB32(extradata, data);
301 }
302 
304 {
305  XMVDemuxContext *xmv = s->priv_data;
306  AVIOContext *pb = s->pb;
307 
308  uint8_t data[8];
309  uint16_t audio_track;
310  uint32_t data_offset;
311 
312  /* Next packet size */
313  xmv->next_packet_size = avio_rl32(pb);
314 
315  /* Packet video header */
316 
317  if (avio_read(pb, data, 8) != 8)
318  return AVERROR(EIO);
319 
320  xmv->video.data_size = AV_RL32(data) & 0x007FFFFF;
321 
322  xmv->video.current_frame = 0;
323  xmv->video.frame_count = (AV_RL32(data) >> 23) & 0xFF;
324 
325  xmv->video.has_extradata = (data[3] & 0x80) != 0;
326 
327  /* Adding the audio data sizes and the video data size keeps you 4 bytes
328  * short for every audio track. But as playing around with XMV files with
329  * ADPCM audio showed, taking the extra 4 bytes from the audio data gives
330  * you either completely distorted audio or click (when skipping the
331  * remaining 68 bytes of the ADPCM block). Substracting 4 bytes for every
332  * audio track from the video data works at least for the audio. Probably
333  * some alignment thing?
334  * The video data has (always?) lots of padding, so it should work out...
335  */
336  xmv->video.data_size -= xmv->audio_track_count * 4;
337 
338  xmv->current_stream = 0;
339  if (!xmv->video.frame_count) {
340  xmv->video.frame_count = 1;
341  xmv->current_stream = 1;
342  }
343 
344  /* Packet audio header */
345 
346  for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) {
347  XMVAudioPacket *packet = &xmv->audio[audio_track];
348 
349  if (avio_read(pb, data, 4) != 4)
350  return AVERROR(EIO);
351 
352  packet->data_size = AV_RL32(data) & 0x007FFFFF;
353  if ((packet->data_size == 0) && (audio_track != 0))
354  /* This happens when I create an XMV with several identical audio
355  * streams. From the size calculations, duplicating the previous
356  * stream's size works out, but the track data itself is silent.
357  * Maybe this should also redirect the offset to the previous track?
358  */
359  packet->data_size = xmv->audio[audio_track - 1].data_size;
360 
362  packet->frame_size = packet->data_size / xmv->video.frame_count;
363  packet->frame_size -= packet->frame_size % packet->track->block_align;
364  }
365 
366  /* Packet data offsets */
367 
368  data_offset = avio_tell(pb);
369 
370  xmv->video.data_offset = data_offset;
371  data_offset += xmv->video.data_size;
372 
373  for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) {
374  xmv->audio[audio_track].data_offset = data_offset;
375  data_offset += xmv->audio[audio_track].data_size;
376  }
377 
378  /* Video frames header */
379 
380  /* Read new video extra data */
381  if (xmv->video.data_size > 0) {
382  if (xmv->video.has_extradata) {
384 
385  xmv->video.data_size -= 4;
386  xmv->video.data_offset += 4;
387 
388  if (xmv->video.stream_index >= 0) {
389  AVStream *vst = s->streams[xmv->video.stream_index];
390 
391  assert(xmv->video.stream_index < s->nb_streams);
392 
393  if (vst->codec->extradata_size < 4) {
394  av_free(vst->codec->extradata);
395 
396  vst->codec->extradata =
398  vst->codec->extradata_size = 4;
399  }
400 
401  memcpy(vst->codec->extradata, xmv->video.extradata, 4);
402  }
403  }
404  }
405 
406  return 0;
407 }
408 
410 {
411  XMVDemuxContext *xmv = s->priv_data;
412  AVIOContext *pb = s->pb;
413  int result;
414 
415  /* Seek to it */
417  if (avio_seek(pb, xmv->this_packet_offset, SEEK_SET) != xmv->this_packet_offset)
418  return AVERROR(EIO);
419 
420  /* Update the size */
422  if (xmv->this_packet_size < (12 + xmv->audio_track_count * 4))
423  return AVERROR(EIO);
424 
425  /* Process the header */
426  result = xmv_process_packet_header(s);
427  if (result)
428  return result;
429 
430  /* Update the offset */
432 
433  return 0;
434 }
435 
437  AVPacket *pkt, uint32_t stream)
438 {
439  XMVDemuxContext *xmv = s->priv_data;
440  AVIOContext *pb = s->pb;
441  XMVAudioPacket *audio = &xmv->audio[stream];
442 
443  uint32_t data_size;
444  uint32_t block_count;
445  int result;
446 
447  /* Seek to it */
448  if (avio_seek(pb, audio->data_offset, SEEK_SET) != audio->data_offset)
449  return AVERROR(EIO);
450 
451  if ((xmv->video.current_frame + 1) < xmv->video.frame_count)
452  /* Not the last frame, get at most frame_size bytes. */
453  data_size = FFMIN(audio->frame_size, audio->data_size);
454  else
455  /* Last frame, get the rest. */
456  data_size = audio->data_size;
457 
458  /* Read the packet */
459  result = av_get_packet(pb, pkt, data_size);
460  if (result <= 0)
461  return result;
462 
463  pkt->stream_index = audio->stream_index;
464 
465  /* Calculate the PTS */
466 
467  block_count = data_size / audio->track->block_align;
468 
469  pkt->duration = block_count;
470  pkt->pts = audio->block_count;
471  pkt->dts = AV_NOPTS_VALUE;
472 
473  audio->block_count += block_count;
474 
475  /* Advance offset */
476  audio->data_size -= data_size;
477  audio->data_offset += data_size;
478 
479  return 0;
480 }
481 
483  AVPacket *pkt)
484 {
485  XMVDemuxContext *xmv = s->priv_data;
486  AVIOContext *pb = s->pb;
487  XMVVideoPacket *video = &xmv->video;
488 
489  int result;
490  uint32_t frame_header;
491  uint32_t frame_size, frame_timestamp;
492  uint32_t i;
493 
494  /* Seek to it */
495  if (avio_seek(pb, video->data_offset, SEEK_SET) != video->data_offset)
496  return AVERROR(EIO);
497 
498  /* Read the frame header */
499  frame_header = avio_rl32(pb);
500 
501  frame_size = (frame_header & 0x1FFFF) * 4 + 4;
502  frame_timestamp = (frame_header >> 17);
503 
504  if ((frame_size + 4) > video->data_size)
505  return AVERROR(EIO);
506 
507  /* Create the packet */
508  result = av_new_packet(pkt, frame_size);
509  if (result)
510  return result;
511 
512  /* Contrary to normal WMV2 video, the bit stream in XMV's
513  * WMV2 is little-endian.
514  * TODO: This manual swap is of course suboptimal.
515  */
516  for (i = 0; i < frame_size; i += 4)
517  AV_WB32(pkt->data + i, avio_rl32(pb));
518 
519  pkt->stream_index = video->stream_index;
520 
521  /* Calculate the PTS */
522 
523  video->last_pts = frame_timestamp + video->pts;
524 
525  pkt->duration = 0;
526  pkt->pts = video->last_pts;
527  pkt->dts = AV_NOPTS_VALUE;
528 
529  video->pts += frame_timestamp;
530 
531  /* Keyframe? */
532  pkt->flags = (pkt->data[0] & 0x80) ? 0 : AV_PKT_FLAG_KEY;
533 
534  /* Advance offset */
535  video->data_size -= frame_size + 4;
536  video->data_offset += frame_size + 4;
537 
538  return 0;
539 }
540 
542  AVPacket *pkt)
543 {
544  XMVDemuxContext *xmv = s->priv_data;
545  int result;
546 
547  if (xmv->video.current_frame == xmv->video.frame_count) {
548  /* No frames left in this packet, so we fetch a new one */
549 
550  result = xmv_fetch_new_packet(s);
551  if (result)
552  return result;
553  }
554 
555  if (xmv->current_stream == 0) {
556  /* Fetch a video frame */
557 
558  result = xmv_fetch_video_packet(s, pkt);
559  if (result)
560  return result;
561 
562  } else {
563  /* Fetch an audio frame */
564 
565  result = xmv_fetch_audio_packet(s, pkt, xmv->current_stream - 1);
566  if (result)
567  return result;
568  }
569 
570  /* Increase our counters */
571  if (++xmv->current_stream >= xmv->stream_count) {
572  xmv->current_stream = 0;
573  xmv->video.current_frame += 1;
574  }
575 
576  return 0;
577 }
578 
580  .name = "xmv",
581  .long_name = NULL_IF_CONFIG_SMALL("Microsoft XMV"),
582  .priv_data_size = sizeof(XMVDemuxContext),
587 };