Panda3D
ffmpegAudioCursor.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file ffmpegAudioCursor.cxx
10  * @author jyelon
11  * @date 2007-08-01
12  */
13 
14 #include "config_ffmpeg.h"
15 #include "ffmpegAudioCursor.h"
16 
17 #include "ffmpegAudio.h"
18 extern "C" {
19  #include <libavutil/dict.h>
20  #include <libavutil/opt.h>
21  #include <libavcodec/avcodec.h>
22  #include <libavformat/avformat.h>
23 }
24 
25 #ifdef HAVE_SWRESAMPLE
26 extern "C" {
27  #include <libswresample/swresample.h>
28 }
29 #endif
30 
31 TypeHandle FfmpegAudioCursor::_type_handle;
32 
33 #if LIBAVFORMAT_VERSION_MAJOR < 53
34  #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO
35 #endif
36 
37 #ifndef AVCODEC_MAX_AUDIO_FRAME_SIZE
38 // More recent versions of ffmpeg no longer define this.
39 #define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
40 #endif
41 
42 /**
43  * xxx
44  */
47  MovieAudioCursor(src),
48  _filename(src->_filename),
49  _packet(nullptr),
50  _packet_data(nullptr),
51  _format_ctx(nullptr),
52  _audio_ctx(nullptr),
53  _resample_ctx(nullptr),
54  _buffer(nullptr),
55  _buffer_alloc(nullptr),
56  _frame(nullptr)
57 {
58  if (!_ffvfile.open_vfs(_filename)) {
59  cleanup();
60  return;
61  }
62 
63  _format_ctx = _ffvfile.get_format_context();
64  nassertv(_format_ctx != nullptr);
65 
66  if (avformat_find_stream_info(_format_ctx, nullptr) < 0) {
67  cleanup();
68  return;
69  }
70 
71  // As of libavformat version 57.41.100, AVStream.codec is deprecated in favor
72  // of AVStream.codecpar. Fortunately, the two structures have
73  // similarly-named members, so we can just switch out the declaration.
74 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
75  AVCodecParameters *codecpar;
76 #else
77  AVCodecContext *codecpar;
78 #endif
79 
80  // Find the audio stream
81  AVStream *stream = nullptr;
82  for (int i = 0; i < (int)_format_ctx->nb_streams; i++) {
83 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
84  codecpar = _format_ctx->streams[i]->codecpar;
85 #else
86  codecpar = _format_ctx->streams[i]->codec;
87 #endif
88  if (codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
89  _audio_index = i;
90  stream = _format_ctx->streams[i];
91  break;
92  }
93  }
94 
95  if (stream == nullptr) {
96  cleanup();
97  return;
98  }
99 
100  _audio_timebase = av_q2d(stream->time_base);
101  _audio_rate = codecpar->sample_rate;
102  _audio_channels = codecpar->channels;
103 
104  AVCodec *pAudioCodec = avcodec_find_decoder(codecpar->codec_id);
105  if (pAudioCodec == nullptr) {
106  cleanup();
107  return;
108  }
109 
110  _audio_ctx = avcodec_alloc_context3(pAudioCodec);
111 
112  if (_audio_ctx == nullptr) {
113  cleanup();
114  return;
115  }
116 
117 #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
118  avcodec_parameters_to_context(_audio_ctx, codecpar);
119 #else
120  avcodec_copy_context(_audio_ctx, codecpar);
121 #endif
122 
123  AVDictionary *opts = nullptr;
124  av_dict_set(&opts, "request_sample_fmt", "s16", 0);
125  if (avcodec_open2(_audio_ctx, pAudioCodec, nullptr) < 0) {
126  cleanup();
127  return;
128  }
129 
130  av_dict_free(&opts);
131 
132  // Set up the resample context if necessary.
133  if (_audio_ctx->sample_fmt != AV_SAMPLE_FMT_S16) {
134 #ifdef HAVE_SWRESAMPLE
135  ffmpeg_cat.debug()
136  << "Codec does not use signed 16-bit sample format. Setting up swresample context.\n";
137 
138  _resample_ctx = swr_alloc();
139  av_opt_set_int(_resample_ctx, "in_channel_count", _audio_channels, 0);
140  av_opt_set_int(_resample_ctx, "out_channel_count", _audio_channels, 0);
141  av_opt_set_int(_resample_ctx, "in_channel_layout", _audio_ctx->channel_layout, 0);
142  av_opt_set_int(_resample_ctx, "out_channel_layout", _audio_ctx->channel_layout, 0);
143  av_opt_set_int(_resample_ctx, "in_sample_rate", _audio_ctx->sample_rate, 0);
144  av_opt_set_int(_resample_ctx, "out_sample_rate", _audio_ctx->sample_rate, 0);
145  av_opt_set_sample_fmt(_resample_ctx, "in_sample_fmt", _audio_ctx->sample_fmt, 0);
146  av_opt_set_sample_fmt(_resample_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
147 
148  if (swr_init(_resample_ctx) != 0) {
149  ffmpeg_cat.error()
150  << "Failed to set up resample context.\n";
151  _resample_ctx = nullptr;
152  }
153 #else
154  ffmpeg_cat.error()
155  << "Codec does not use signed 16-bit sample format, but support for libswresample has not been enabled.\n";
156 #endif
157  }
158 
159  _length = (_format_ctx->duration * 1.0) / AV_TIME_BASE;
160  _can_seek = true;
161  _can_seek_fast = true;
162 
163 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
164  _frame = av_frame_alloc();
165 #else
166  _frame = avcodec_alloc_frame();
167 #endif
168 
169 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
170  _packet = av_packet_alloc();
171 #else
172  _packet = new AVPacket;
173 #endif
174 
175  _buffer_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / 2;
176  _buffer_alloc = new int16_t[_buffer_size + 64];
177 
178  // Allocate enough space for 1024 samples per channel.
179  if ((_packet == nullptr)||(_buffer_alloc == nullptr)) {
180  cleanup();
181  return;
182  }
183  memset(_packet, 0, sizeof(AVPacket));
184 
185  // Align the buffer to a 64-byte boundary The ffmpeg codec likes this,
186  // because it uses SSESSE2.
187  _buffer = _buffer_alloc;
188  while (((size_t)_buffer) & 31) {
189  _buffer += 1;
190  }
191 
192  fetch_packet();
193  _initial_dts = _packet->dts;
194  _last_seek = 0;
195  _samples_read = 0;
196  _buffer_head = 0;
197  _buffer_tail = 0;
198 }
199 
200 /**
201  * xxx
202  */
205  cleanup();
206 }
207 
208 /**
209  * Reset to a standard inactive state.
210  */
211 void FfmpegAudioCursor::
212 cleanup() {
213  if (_audio_ctx && _audio_ctx->codec) {
214 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
215  // We need to drain the codec to prevent a memory leak.
216  avcodec_send_packet(_audio_ctx, nullptr);
217  while (avcodec_receive_frame(_audio_ctx, _frame) == 0) {}
218  avcodec_flush_buffers(_audio_ctx);
219 #endif
220 
221  avcodec_close(_audio_ctx);
222 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 52, 0)
223  avcodec_free_context(&_audio_ctx);
224 #else
225  delete _audio_ctx;
226 #endif
227  }
228  _audio_ctx = nullptr;
229 
230  if (_frame) {
231 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
232  av_frame_free(&_frame);
233 #else
234  avcodec_free_frame(&_frame);
235 #endif
236  _frame = nullptr;
237  }
238 
239  if (_packet) {
240 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
241  av_packet_free(&_packet);
242 #else
243  if (_packet->data) {
244  av_free_packet(_packet);
245  }
246  delete _packet;
247  _packet = nullptr;
248 #endif
249  }
250 
251  if (_buffer_alloc) {
252  delete[] _buffer_alloc;
253  _buffer_alloc = nullptr;
254  _buffer = nullptr;
255  }
256 
257  if (_format_ctx) {
258  _ffvfile.close();
259  _format_ctx = nullptr;
260  }
261 
262 #ifdef HAVE_SWRESAMPLE
263  if (_resample_ctx) {
264  swr_free(&_resample_ctx);
265  _resample_ctx = nullptr;
266  }
267 #endif
268 
269  _audio_index = -1;
270 }
271 
272 /**
273  * Fetches an audio packet and stores it in the packet buffer. Also sets
274  * packet_size and packet_data.
275  */
276 void FfmpegAudioCursor::
277 fetch_packet() {
278  if (_packet->data) {
279 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
280  av_packet_unref(_packet);
281 #else
282  av_free_packet(_packet);
283 #endif
284  }
285  while (av_read_frame(_format_ctx, _packet) >= 0) {
286  if (_packet->stream_index == _audio_index) {
287  _packet_size = _packet->size;
288  _packet_data = _packet->data;
289  return;
290  }
291 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
292  av_packet_unref(_packet);
293 #else
294  av_free_packet(_packet);
295 #endif
296  }
297  _packet->data = nullptr;
298  _packet_size = 0;
299  _packet_data = nullptr;
300 }
301 
302 /**
303  * Reloads the audio buffer by decoding audio packets until one of those audio
304  * packets finally yields some samples. If we encounter the end of the
305  * stream, we synthesize silence.
306  */
307 bool FfmpegAudioCursor::
308 reload_buffer() {
309 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
310  // lavc >= 57.37.100 deprecates the old (avcodec_decode_audio*) API in favor
311  // of a newer, asynchronous API. This is great for our purposes - it gives
312  // the codec the opportunity to decode in the background (e.g. in another
313  // thread or on a dedicated hardware coprocessor).
314 
315  // First, let's fill the codec's input buffer with as many packets as it'll
316  // take:
317  int ret = 0;
318  while (_packet->data != nullptr) {
319  ret = avcodec_send_packet(_audio_ctx, _packet);
320 
321  if (ret != 0) {
322  // Nonzero return code is an error.
323  break;
324  }
325 
326  // If we got here, the codec took the packet! Fetch another one.
327  fetch_packet();
328  if (_packet->data == nullptr) {
329  // fetch_packet() says we're out of packets. Let the codec know.
330  ret = avcodec_send_packet(_audio_ctx, nullptr);
331  }
332  }
333 
334  // Expected ret codes are 0 (we ran out of packets) and EAGAIN (codec full)
335  if ((ret != 0) && (ret != AVERROR(EAGAIN))) {
336  // Some odd error happened. We can't proceed.
337  ffmpeg_cat.error()
338  << "avcodec_send_packet returned " << ret << "\n";
339  return false;
340  }
341 
342  // Now we retrieve our frame!
343  ret = avcodec_receive_frame(_audio_ctx, _frame);
344 
345  if (ret == AVERROR_EOF) {
346  // The only way for this to happen is if we're out of packets.
347  nassertr(_packet->data == nullptr, false);
348 
349  // Synthesize silence:
350  _buffer_head = 0;
351  _buffer_tail = _buffer_size;
352  memset(_buffer, 0, _buffer_size * 2);
353  return true;
354 
355  } else if (ret != 0) {
356  // Some odd error happened. We can't proceed.
357  ffmpeg_cat.error()
358  << "avcodec_receive_frame returned " << ret << "\n";
359  return false;
360  }
361 
362  // We now have _frame. It will be handled below.
363 
364 #else
365  int got_frame = 0;
366  while (!got_frame) {
367  // If we're out of packets, generate silence.
368  if (_packet->data == nullptr) {
369  _buffer_head = 0;
370  _buffer_tail = _buffer_size;
371  memset(_buffer, 0, _buffer_size * 2);
372  return true;
373  } else if (_packet_size == 0) {
374  fetch_packet();
375  }
376 
377  AVPacket *pkt;
378 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
379  pkt = av_packet_alloc();
380 #else
381  AVPacket _pkt;
382  pkt = &_pkt;
383  av_init_packet(pkt);
384 #endif
385  pkt->data = _packet_data;
386  pkt->size = _packet_size;
387 
388  int len = avcodec_decode_audio4(_audio_ctx, _frame, &got_frame, pkt);
389  movies_debug("avcodec_decode_audio4 returned " << len);
390 
391 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
392  av_packet_free(&pkt);
393 #else
394  av_free_packet(pkt);
395 #endif
396 
397  if (len < 0) {
398  return false;
399  } else if (len == 0) {
400  return true;
401  }
402  _packet_data += len;
403  _packet_size -= len;
404  }
405 #endif
406 
407  int bufsize;
408 #ifdef HAVE_SWRESAMPLE
409  if (_resample_ctx) {
410  // Resample the data to signed 16-bit sample format.
411  bufsize = swr_convert(_resample_ctx, (uint8_t **)&_buffer, _buffer_size / 2, (const uint8_t**)_frame->extended_data, _frame->nb_samples);
412  bufsize *= _audio_channels * 2;
413  } else
414 #endif
415  {
416  bufsize = _frame->linesize[0];
417  memcpy(_buffer, _frame->data[0], bufsize);
418  }
419 #if LIBAVUTIL_VERSION_INT > AV_VERSION_INT(52, 19, 100)
420  av_frame_unref(_frame);
421 #endif
422 
423  if (bufsize > 0) {
424  _buffer_head = 0;
425  _buffer_tail = (bufsize/2);
426  return true;
427  }
428  return true;
429 }
430 
431 /**
432  * Seeks to a target location. Afterward, the packet_time is guaranteed to be
433  * less than or equal to the specified time.
434  */
436 seek(double t) {
437  int64_t target_ts = (int64_t)(t / _audio_timebase);
438  if (target_ts < (int64_t)(_initial_dts)) {
439  // Attempts to seek before the first packet will fail.
440  target_ts = _initial_dts;
441  }
442  if (av_seek_frame(_format_ctx, _audio_index, target_ts, AVSEEK_FLAG_BACKWARD) < 0) {
443  ffmpeg_cat.error() << "Seek failure. Shutting down movie.\n";
444  cleanup();
445  return;
446  }
447  avcodec_flush_buffers(_audio_ctx);
448  _buffer_head = 0;
449  _buffer_tail = 0;
450  fetch_packet();
451  double ts = _packet->dts * _audio_timebase;
452  if (t > ts) {
453  int skip = (int)((t-ts) * _audio_rate);
454  read_samples(skip, nullptr);
455  }
456  _last_seek = t;
457  _samples_read = 0;
458 }
459 
460 /**
461  * Read audio samples from the stream. N is the number of samples you wish to
462  * read. Your buffer must be equal in size to N * channels. Multiple-channel
463  * audio will be interleaved.
464  */
466 read_samples(int n, int16_t *data) {
467  int desired = n * _audio_channels;
468 
469  while (desired > 0) {
470  if (_buffer_head == _buffer_tail) {
471  if(!reload_buffer()){
472  break;
473  }
474  movies_debug("read_samples() desired samples: " << desired << " N:" << n);
475  }
476  int available = _buffer_tail - _buffer_head;
477  int ncopy = (desired > available) ? available : desired;
478  if (ncopy) {
479  if (data != nullptr) {
480  memcpy(data, _buffer + _buffer_head, ncopy * 2);
481  data += ncopy;
482  }
483  desired -= ncopy;
484  _buffer_head += ncopy;
485  }
486 
487  }
488  _samples_read += n;
489 }
virtual void read_samples(int n, int16_t *data)
Read audio samples from the stream.
virtual void seek(double offset)
Seeks to a target location.
bool open_vfs(const Filename &filename)
Opens the movie file via Panda's VFS.
A stream that generates a sequence of audio samples.
Definition: ffmpegAudio.h:25
void close()
Explicitly closes the opened file.
virtual ~FfmpegAudioCursor()
xxx
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A MovieAudio is actually any source that provides a sequence of audio samples.
FfmpegAudioCursor(FfmpegAudio *src)
xxx
AVFormatContext * get_format_context() const
Returns a pointer to the opened ffmpeg context, or NULL if the file was not successfully opened.
TypeHandle is the identifier used to differentiate C++ class types.
Definition: typeHandle.h:81
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.