Panda3D
ffmpegAudioCursor.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file ffmpegAudioCursor.cxx
10  * @author jyelon
11  * @date 2007-08-01
12  */
13 
14 #include "config_ffmpeg.h"
15 #include "ffmpegAudioCursor.h"
16 
17 #include "ffmpegAudio.h"
18 extern "C" {
19  #include <libavutil/dict.h>
20  #include <libavutil/opt.h>
21  #include <libavcodec/avcodec.h>
22  #include <libavformat/avformat.h>
23 }
24 
25 #ifdef HAVE_SWRESAMPLE
26 extern "C" {
27  #include <libswresample/swresample.h>
28 }
29 #endif
30 
31 TypeHandle FfmpegAudioCursor::_type_handle;
32 
33 #if LIBAVFORMAT_VERSION_MAJOR < 53
34  #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO
35 #endif
36 
37 #ifndef AVCODEC_MAX_AUDIO_FRAME_SIZE
38 // More recent versions of ffmpeg no longer define this.
39 #define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
40 #endif
41 
42 /**
43  * xxx
44  */
47  MovieAudioCursor(src),
48  _filename(src->_filename),
49  _packet(nullptr),
50  _packet_data(nullptr),
51  _format_ctx(nullptr),
52  _audio_ctx(nullptr),
53  _resample_ctx(nullptr),
54  _buffer(nullptr),
55  _buffer_alloc(nullptr),
56  _frame(nullptr)
57 {
58  if (!_ffvfile.open_vfs(_filename)) {
59  cleanup();
60  return;
61  }
62 
63  _format_ctx = _ffvfile.get_format_context();
64  nassertv(_format_ctx != nullptr);
65 
66  if (avformat_find_stream_info(_format_ctx, nullptr) < 0) {
67  cleanup();
68  return;
69  }
70 
71  // As of libavformat version 57.41.100, AVStream.codec is deprecated in favor
72  // of AVStream.codecpar. Fortunately, the two structures have
73  // similarly-named members, so we can just switch out the declaration.
74 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
75  AVCodecParameters *codecpar;
76 #else
77  AVCodecContext *codecpar;
78 #endif
79 
80  // Find the audio stream
81  AVStream *stream = nullptr;
82  for (int i = 0; i < (int)_format_ctx->nb_streams; i++) {
83 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
84  codecpar = _format_ctx->streams[i]->codecpar;
85 #else
86  codecpar = _format_ctx->streams[i]->codec;
87 #endif
88  if (codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
89  _audio_index = i;
90  stream = _format_ctx->streams[i];
91  break;
92  }
93  }
94 
95  if (stream == nullptr) {
96  cleanup();
97  return;
98  }
99 
100  _audio_timebase = av_q2d(stream->time_base);
101  _audio_rate = codecpar->sample_rate;
102  _audio_channels = codecpar->channels;
103 
104  const AVCodec *pAudioCodec = avcodec_find_decoder(codecpar->codec_id);
105  if (pAudioCodec == nullptr) {
106  cleanup();
107  return;
108  }
109 
110  _audio_ctx = avcodec_alloc_context3(pAudioCodec);
111 
112  if (_audio_ctx == nullptr) {
113  cleanup();
114  return;
115  }
116 
117 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
118  avcodec_parameters_to_context(_audio_ctx, codecpar);
119 #else
120  avcodec_copy_context(_audio_ctx, codecpar);
121 #endif
122 
123  AVDictionary *opts = nullptr;
124  av_dict_set(&opts, "request_sample_fmt", "s16", 0);
125  if (avcodec_open2(_audio_ctx, pAudioCodec, nullptr) < 0) {
126  cleanup();
127  return;
128  }
129 
130  av_dict_free(&opts);
131 
132  // Set up the resample context if necessary.
133  if (_audio_ctx->sample_fmt != AV_SAMPLE_FMT_S16) {
134 #ifdef HAVE_SWRESAMPLE
135  if (ffmpeg_cat.is_debug()) {
136  ffmpeg_cat.debug()
137  << "Codec does not use signed 16-bit sample format. Setting up swresample context.\n";
138  }
139 
140  _resample_ctx = swr_alloc();
141  av_opt_set_int(_resample_ctx, "in_channel_count", _audio_channels, 0);
142  av_opt_set_int(_resample_ctx, "out_channel_count", _audio_channels, 0);
143  av_opt_set_int(_resample_ctx, "in_channel_layout", _audio_ctx->channel_layout, 0);
144  av_opt_set_int(_resample_ctx, "out_channel_layout", _audio_ctx->channel_layout, 0);
145  av_opt_set_int(_resample_ctx, "in_sample_rate", _audio_ctx->sample_rate, 0);
146  av_opt_set_int(_resample_ctx, "out_sample_rate", _audio_ctx->sample_rate, 0);
147  av_opt_set_sample_fmt(_resample_ctx, "in_sample_fmt", _audio_ctx->sample_fmt, 0);
148  av_opt_set_sample_fmt(_resample_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
149 
150  if (swr_init(_resample_ctx) != 0) {
151  ffmpeg_cat.error()
152  << "Failed to set up resample context.\n";
153  _resample_ctx = nullptr;
154  }
155 #else
156  ffmpeg_cat.error()
157  << "Codec does not use signed 16-bit sample format, but support for libswresample has not been enabled.\n";
158 #endif
159  }
160 
161  _length = (_format_ctx->duration * 1.0) / AV_TIME_BASE;
162  _can_seek = true;
163  _can_seek_fast = true;
164 
165 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
166  _frame = av_frame_alloc();
167 #else
168  _frame = avcodec_alloc_frame();
169 #endif
170 
171 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
172  _packet = av_packet_alloc();
173 #else
174  _packet = new AVPacket;
175 #endif
176 
177  _buffer_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / 2;
178  _buffer_alloc = new int16_t[_buffer_size + 64];
179 
180  // Allocate enough space for 1024 samples per channel.
181  if ((_packet == nullptr)||(_buffer_alloc == nullptr)) {
182  cleanup();
183  return;
184  }
185  memset(_packet, 0, sizeof(AVPacket));
186 
187  // Align the buffer to a 64-byte boundary The ffmpeg codec likes this,
188  // because it uses SSESSE2.
189  _buffer = _buffer_alloc;
190  while (((size_t)_buffer) & 31) {
191  _buffer += 1;
192  }
193 
194  fetch_packet();
195  _initial_dts = _packet->dts;
196  _last_seek = 0;
197  _samples_read = 0;
198  _buffer_head = 0;
199  _buffer_tail = 0;
200 }
201 
202 /**
203  * xxx
204  */
207  cleanup();
208 }
209 
210 /**
211  * Reset to a standard inactive state.
212  */
213 void FfmpegAudioCursor::
214 cleanup() {
215  if (_audio_ctx && _audio_ctx->codec) {
216 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
217  // We need to drain the codec to prevent a memory leak.
218  avcodec_send_packet(_audio_ctx, nullptr);
219  while (avcodec_receive_frame(_audio_ctx, _frame) == 0) {}
220  avcodec_flush_buffers(_audio_ctx);
221 #endif
222 
223  avcodec_close(_audio_ctx);
224 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 52, 0)
225  avcodec_free_context(&_audio_ctx);
226 #else
227  av_free(_audio_ctx);
228 #endif
229  }
230  _audio_ctx = nullptr;
231 
232  if (_frame) {
233 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
234  av_frame_free(&_frame);
235 #else
236  avcodec_free_frame(&_frame);
237 #endif
238  _frame = nullptr;
239  }
240 
241  if (_packet) {
242 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
243  av_packet_free(&_packet);
244 #else
245  if (_packet->data) {
246  av_free_packet(_packet);
247  }
248  delete _packet;
249  _packet = nullptr;
250 #endif
251  }
252 
253  if (_buffer_alloc) {
254  delete[] _buffer_alloc;
255  _buffer_alloc = nullptr;
256  _buffer = nullptr;
257  }
258 
259  if (_format_ctx) {
260  _ffvfile.close();
261  _format_ctx = nullptr;
262  }
263 
264 #ifdef HAVE_SWRESAMPLE
265  if (_resample_ctx) {
266  swr_free(&_resample_ctx);
267  _resample_ctx = nullptr;
268  }
269 #endif
270 
271  _audio_index = -1;
272 }
273 
274 /**
275  * Fetches an audio packet and stores it in the packet buffer. Also sets
276  * packet_size and packet_data.
277  */
278 void FfmpegAudioCursor::
279 fetch_packet() {
280  if (_packet->data) {
281 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
282  av_packet_unref(_packet);
283 #else
284  av_free_packet(_packet);
285 #endif
286  }
287  while (av_read_frame(_format_ctx, _packet) >= 0) {
288  if (_packet->stream_index == _audio_index) {
289  _packet_size = _packet->size;
290  _packet_data = _packet->data;
291  return;
292  }
293 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
294  av_packet_unref(_packet);
295 #else
296  av_free_packet(_packet);
297 #endif
298  }
299  _packet->data = nullptr;
300  _packet_size = 0;
301  _packet_data = nullptr;
302 }
303 
304 /**
305  * Reloads the audio buffer by decoding audio packets until one of those audio
306  * packets finally yields some samples. If we encounter the end of the
307  * stream, we synthesize silence.
308  */
309 bool FfmpegAudioCursor::
310 reload_buffer() {
311 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
312  // lavc >= 57.37.100 deprecates the old (avcodec_decode_audio*) API in favor
313  // of a newer, asynchronous API. This is great for our purposes - it gives
314  // the codec the opportunity to decode in the background (e.g. in another
315  // thread or on a dedicated hardware coprocessor).
316 
317  // First, let's fill the codec's input buffer with as many packets as it'll
318  // take:
319  int ret = 0;
320  while (_packet->data != nullptr) {
321  ret = avcodec_send_packet(_audio_ctx, _packet);
322 
323  if (ret != 0) {
324  // Nonzero return code is an error.
325  break;
326  }
327 
328  // If we got here, the codec took the packet! Fetch another one.
329  fetch_packet();
330  if (_packet->data == nullptr) {
331  // fetch_packet() says we're out of packets. Let the codec know.
332  ret = avcodec_send_packet(_audio_ctx, nullptr);
333  }
334  }
335 
336  // Expected ret codes are 0 (we ran out of packets) and EAGAIN (codec full)
337  if ((ret != 0) && (ret != AVERROR(EAGAIN))) {
338  // Some odd error happened. We can't proceed.
339  ffmpeg_cat.error()
340  << "avcodec_send_packet returned " << ret << "\n";
341  return false;
342  }
343 
344  // Now we retrieve our frame!
345  ret = avcodec_receive_frame(_audio_ctx, _frame);
346 
347  if (ret == AVERROR_EOF) {
348  // The only way for this to happen is if we're out of packets.
349  nassertr(_packet->data == nullptr, false);
350 
351  // Synthesize silence:
352  _buffer_head = 0;
353  _buffer_tail = _buffer_size;
354  memset(_buffer, 0, _buffer_size * 2);
355  return true;
356 
357  } else if (ret != 0) {
358  // Some odd error happened. We can't proceed.
359  ffmpeg_cat.error()
360  << "avcodec_receive_frame returned " << ret << "\n";
361  return false;
362  }
363 
364  // We now have _frame. It will be handled below.
365 
366 #else
367  int got_frame = 0;
368  while (!got_frame) {
369  // If we're out of packets, generate silence.
370  if (_packet->data == nullptr) {
371  _buffer_head = 0;
372  _buffer_tail = _buffer_size;
373  memset(_buffer, 0, _buffer_size * 2);
374  return true;
375  } else if (_packet_size == 0) {
376  fetch_packet();
377  }
378 
379  AVPacket *pkt;
380 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
381  pkt = av_packet_alloc();
382 #else
383  AVPacket _pkt;
384  pkt = &_pkt;
385  av_init_packet(pkt);
386 #endif
387  pkt->data = _packet_data;
388  pkt->size = _packet_size;
389 
390  int len = avcodec_decode_audio4(_audio_ctx, _frame, &got_frame, pkt);
391  movies_debug("avcodec_decode_audio4 returned " << len);
392 
393 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
394  av_packet_free(&pkt);
395 #else
396  av_free_packet(pkt);
397 #endif
398 
399  if (len < 0) {
400  return false;
401  } else if (len == 0) {
402  return true;
403  }
404  _packet_data += len;
405  _packet_size -= len;
406  }
407 #endif
408 
409  int bufsize;
410 #ifdef HAVE_SWRESAMPLE
411  if (_resample_ctx) {
412  // Resample the data to signed 16-bit sample format.
413  bufsize = swr_convert(_resample_ctx, (uint8_t **)&_buffer, _buffer_size / 2, (const uint8_t**)_frame->extended_data, _frame->nb_samples);
414  bufsize *= _audio_channels * 2;
415  } else
416 #endif
417  {
418  bufsize = _frame->linesize[0];
419  memcpy(_buffer, _frame->data[0], bufsize);
420  }
421 #if LIBAVUTIL_VERSION_INT > AV_VERSION_INT(52, 19, 100)
422  av_frame_unref(_frame);
423 #endif
424 
425  if (bufsize > 0) {
426  _buffer_head = 0;
427  _buffer_tail = (bufsize/2);
428  return true;
429  }
430  return true;
431 }
432 
433 /**
434  * Seeks to a target location. Afterward, the packet_time is guaranteed to be
435  * less than or equal to the specified time.
436  */
438 seek(double t) {
439  int64_t target_ts = (int64_t)(t / _audio_timebase);
440  if (target_ts < (int64_t)(_initial_dts)) {
441  // Attempts to seek before the first packet will fail.
442  target_ts = _initial_dts;
443  }
444  if (av_seek_frame(_format_ctx, _audio_index, target_ts, AVSEEK_FLAG_BACKWARD) < 0) {
445  ffmpeg_cat.error() << "Seek failure. Shutting down movie.\n";
446  cleanup();
447  return;
448  }
449  avcodec_flush_buffers(_audio_ctx);
450  _buffer_head = 0;
451  _buffer_tail = 0;
452  fetch_packet();
453  double ts = _packet->dts * _audio_timebase;
454  if (t > ts) {
455  int skip = (int)((t-ts) * _audio_rate);
456  read_samples(skip, nullptr);
457  }
458  _last_seek = t;
459  _samples_read = 0;
460 }
461 
462 /**
463  * Read audio samples from the stream. N is the number of samples you wish to
464  * read. Your buffer must be equal in size to N * channels. Multiple-channel
465  * audio will be interleaved.
466  */
468 read_samples(int n, int16_t *data) {
469  int desired = n * _audio_channels;
470 
471  while (desired > 0) {
472  if (_buffer_head == _buffer_tail) {
473  if(!reload_buffer()){
474  break;
475  }
476  movies_debug("read_samples() desired samples: " << desired << " N:" << n);
477  }
478  int available = _buffer_tail - _buffer_head;
479  int ncopy = (desired > available) ? available : desired;
480  if (ncopy) {
481  if (data != nullptr) {
482  memcpy(data, _buffer + _buffer_head, ncopy * 2);
483  data += ncopy;
484  }
485  desired -= ncopy;
486  _buffer_head += ncopy;
487  }
488 
489  }
490  _samples_read += n;
491 }
virtual void seek(double offset)
Seeks to a target location.
FfmpegAudioCursor(FfmpegAudio *src)
xxx
virtual void read_samples(int n, int16_t *data)
Read audio samples from the stream.
virtual ~FfmpegAudioCursor()
xxx
A stream that generates a sequence of audio samples.
Definition: ffmpegAudio.h:25
bool open_vfs(const Filename &filename)
Opens the movie file via Panda's VFS.
AVFormatContext * get_format_context() const
Returns a pointer to the opened ffmpeg context, or NULL if the file was not successfully opened.
void close()
Explicitly closes the opened file.
A MovieAudio is actually any source that provides a sequence of audio samples.
TypeHandle is the identifier used to differentiate C++ class types.
Definition: typeHandle.h:81
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.