Panda3D
Loading...
Searching...
No Matches
ffmpegAudioCursor.cxx
Go to the documentation of this file.
1/**
2 * PANDA 3D SOFTWARE
3 * Copyright (c) Carnegie Mellon University. All rights reserved.
4 *
5 * All use of this software is subject to the terms of the revised BSD
6 * license. You should have received a copy of this license along
7 * with this source code in a file named "LICENSE."
8 *
9 * @file ffmpegAudioCursor.cxx
10 * @author jyelon
11 * @date 2007-08-01
12 */
13
14#include "config_ffmpeg.h"
15#include "ffmpegAudioCursor.h"
16
17#include "ffmpegAudio.h"
18extern "C" {
19 #include <libavutil/dict.h>
20 #include <libavutil/opt.h>
21 #include <libavcodec/avcodec.h>
22 #include <libavformat/avformat.h>
23}
24
25#ifdef HAVE_SWRESAMPLE
26extern "C" {
27 #include <libswresample/swresample.h>
28}
29#endif
30
31TypeHandle FfmpegAudioCursor::_type_handle;
32
33#if LIBAVFORMAT_VERSION_MAJOR < 53
34 #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO
35#endif
36
37#ifndef AVCODEC_MAX_AUDIO_FRAME_SIZE
38// More recent versions of ffmpeg no longer define this.
39#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
40#endif
41
42/**
43 * xxx
44 */
48 _filename(src->_filename),
49 _packet(nullptr),
50 _packet_data(nullptr),
51 _format_ctx(nullptr),
52 _audio_ctx(nullptr),
53 _resample_ctx(nullptr),
54 _buffer(nullptr),
55 _buffer_alloc(nullptr),
56 _frame(nullptr)
57{
58 if (!_ffvfile.open_vfs(_filename)) {
59 cleanup();
60 return;
61 }
62
63 _format_ctx = _ffvfile.get_format_context();
64 nassertv(_format_ctx != nullptr);
65
66 if (avformat_find_stream_info(_format_ctx, nullptr) < 0) {
67 cleanup();
68 return;
69 }
70
71 // As of libavformat version 57.41.100, AVStream.codec is deprecated in favor
72 // of AVStream.codecpar. Fortunately, the two structures have
73 // similarly-named members, so we can just switch out the declaration.
74#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
75 AVCodecParameters *codecpar;
76#else
77 AVCodecContext *codecpar;
78#endif
79
80 // Find the audio stream
81 AVStream *stream = nullptr;
82 for (int i = 0; i < (int)_format_ctx->nb_streams; i++) {
83#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
84 codecpar = _format_ctx->streams[i]->codecpar;
85#else
86 codecpar = _format_ctx->streams[i]->codec;
87#endif
88 if (codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
89 _audio_index = i;
90 stream = _format_ctx->streams[i];
91 break;
92 }
93 }
94
95 if (stream == nullptr) {
96 cleanup();
97 return;
98 }
99
100 _audio_timebase = av_q2d(stream->time_base);
101 _audio_rate = codecpar->sample_rate;
102
103 // As of libavformat version 60.25.100, the deprecated
104 // AVCodecParameters.channels has been removed.
105 // AVCodecParameters.ch_layout is available since version 59.18.101.
106#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(59, 18, 101)
107 _audio_channels = codecpar->ch_layout.nb_channels;
108#else
109 _audio_channels = codecpar->channels;
110#endif /* LIBAVFORMAT_VERSION_INT */
111
112 const AVCodec *pAudioCodec = avcodec_find_decoder(codecpar->codec_id);
113 if (pAudioCodec == nullptr) {
114 cleanup();
115 return;
116 }
117
118 _audio_ctx = avcodec_alloc_context3(pAudioCodec);
119
120 if (_audio_ctx == nullptr) {
121 cleanup();
122 return;
123 }
124
125#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
126 avcodec_parameters_to_context(_audio_ctx, codecpar);
127#else
128 avcodec_copy_context(_audio_ctx, codecpar);
129#endif
130
131 AVDictionary *opts = nullptr;
132 av_dict_set(&opts, "request_sample_fmt", "s16", 0);
133 if (avcodec_open2(_audio_ctx, pAudioCodec, nullptr) < 0) {
134 cleanup();
135 return;
136 }
137
138 av_dict_free(&opts);
139
140 // Set up the resample context if necessary.
141 if (_audio_ctx->sample_fmt != AV_SAMPLE_FMT_S16) {
142#ifdef HAVE_SWRESAMPLE
143 if (ffmpeg_cat.is_debug()) {
144 ffmpeg_cat.debug()
145 << "Codec does not use signed 16-bit sample format. Setting up swresample context.\n";
146 }
147
148 _resample_ctx = swr_alloc();
149
150 // As of libavformat version 60.25.100, the deprecated
151 // AVCodecContext.channel_layout has been removed.
152 // AVCodecContext.ch_layout is available since version 59.18.101.
153#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(59, 18, 101)
154 av_opt_set_chlayout(_resample_ctx, "in_chlayout", &_audio_ctx->ch_layout, 0);
155 av_opt_set_chlayout(_resample_ctx, "out_chlayout", &_audio_ctx->ch_layout, 0);
156#else
157 av_opt_set_int(_resample_ctx, "in_channel_count", _audio_channels, 0);
158 av_opt_set_int(_resample_ctx, "out_channel_count", _audio_channels, 0);
159 av_opt_set_int(_resample_ctx, "in_channel_layout", _audio_ctx->channel_layout, 0);
160 av_opt_set_int(_resample_ctx, "out_channel_layout", _audio_ctx->channel_layout, 0);
161#endif /* LIBAVFORMAT_VERSION_INT */
162
163 av_opt_set_int(_resample_ctx, "in_sample_rate", _audio_ctx->sample_rate, 0);
164 av_opt_set_int(_resample_ctx, "out_sample_rate", _audio_ctx->sample_rate, 0);
165 av_opt_set_sample_fmt(_resample_ctx, "in_sample_fmt", _audio_ctx->sample_fmt, 0);
166 av_opt_set_sample_fmt(_resample_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
167
168 if (swr_init(_resample_ctx) != 0) {
169 ffmpeg_cat.error()
170 << "Failed to set up resample context.\n";
171 _resample_ctx = nullptr;
172 }
173#else
174 ffmpeg_cat.error()
175 << "Codec does not use signed 16-bit sample format, but support for libswresample has not been enabled.\n";
176#endif /* HAVE_SWRESAMPLE */
177 }
178
179 _length = (_format_ctx->duration * 1.0) / AV_TIME_BASE;
180 _can_seek = true;
181 _can_seek_fast = true;
182
183#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
184 _frame = av_frame_alloc();
185#else
186 _frame = avcodec_alloc_frame();
187#endif
188
189#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
190 _packet = av_packet_alloc();
191#else
192 _packet = new AVPacket;
193#endif
194
195 _buffer_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / 2;
196 _buffer_alloc = new int16_t[_buffer_size + 64];
197
198 // Allocate enough space for 1024 samples per channel.
199 if ((_packet == nullptr)||(_buffer_alloc == nullptr)) {
200 cleanup();
201 return;
202 }
203 memset(_packet, 0, sizeof(AVPacket));
204
205 // Align the buffer to a 64-byte boundary The ffmpeg codec likes this,
206 // because it uses SSESSE2.
207 _buffer = _buffer_alloc;
208 while (((size_t)_buffer) & 31) {
209 _buffer += 1;
210 }
211
212 fetch_packet();
213 _initial_dts = _packet->dts;
214 _last_seek = 0;
215 _samples_read = 0;
216 _buffer_head = 0;
217 _buffer_tail = 0;
218}
219
220/**
221 * xxx
222 */
225 cleanup();
226}
227
228/**
229 * Reset to a standard inactive state.
230 */
231void FfmpegAudioCursor::
232cleanup() {
233 if (_audio_ctx && _audio_ctx->codec) {
234#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
235 // We need to drain the codec to prevent a memory leak.
236 avcodec_send_packet(_audio_ctx, nullptr);
237 while (avcodec_receive_frame(_audio_ctx, _frame) == 0) {}
238 avcodec_flush_buffers(_audio_ctx);
239#endif
240
241#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 52, 0)
242 avcodec_free_context(&_audio_ctx);
243#else
244 avcodec_close(_audio_ctx);
245 av_free(_audio_ctx);
246#endif
247 }
248 _audio_ctx = nullptr;
249
250 if (_frame) {
251#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
252 av_frame_free(&_frame);
253#else
254 avcodec_free_frame(&_frame);
255#endif
256 _frame = nullptr;
257 }
258
259 if (_packet) {
260#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
261 av_packet_free(&_packet);
262#else
263 if (_packet->data) {
264 av_free_packet(_packet);
265 }
266 delete _packet;
267 _packet = nullptr;
268#endif
269 }
270
271 if (_buffer_alloc) {
272 delete[] _buffer_alloc;
273 _buffer_alloc = nullptr;
274 _buffer = nullptr;
275 }
276
277 if (_format_ctx) {
278 _ffvfile.close();
279 _format_ctx = nullptr;
280 }
281
282#ifdef HAVE_SWRESAMPLE
283 if (_resample_ctx) {
284 swr_free(&_resample_ctx);
285 _resample_ctx = nullptr;
286 }
287#endif
288
289 _audio_index = -1;
290}
291
292/**
293 * Fetches an audio packet and stores it in the packet buffer. Also sets
294 * packet_size and packet_data.
295 */
296void FfmpegAudioCursor::
297fetch_packet() {
298 if (_packet->data) {
299#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
300 av_packet_unref(_packet);
301#else
302 av_free_packet(_packet);
303#endif
304 }
305 while (av_read_frame(_format_ctx, _packet) >= 0) {
306 if (_packet->stream_index == _audio_index) {
307 _packet_size = _packet->size;
308 _packet_data = _packet->data;
309 return;
310 }
311#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
312 av_packet_unref(_packet);
313#else
314 av_free_packet(_packet);
315#endif
316 }
317 _packet->data = nullptr;
318 _packet_size = 0;
319 _packet_data = nullptr;
320}
321
322/**
323 * Reloads the audio buffer by decoding audio packets until one of those audio
324 * packets finally yields some samples. If we encounter the end of the
325 * stream, we synthesize silence.
326 */
327bool FfmpegAudioCursor::
328reload_buffer() {
329#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
330 // lavc >= 57.37.100 deprecates the old (avcodec_decode_audio*) API in favor
331 // of a newer, asynchronous API. This is great for our purposes - it gives
332 // the codec the opportunity to decode in the background (e.g. in another
333 // thread or on a dedicated hardware coprocessor).
334
335 // First, let's fill the codec's input buffer with as many packets as it'll
336 // take:
337 int ret = 0;
338 while (_packet->data != nullptr) {
339 ret = avcodec_send_packet(_audio_ctx, _packet);
340
341 if (ret != 0) {
342 // Nonzero return code is an error.
343 break;
344 }
345
346 // If we got here, the codec took the packet! Fetch another one.
347 fetch_packet();
348 if (_packet->data == nullptr) {
349 // fetch_packet() says we're out of packets. Let the codec know.
350 ret = avcodec_send_packet(_audio_ctx, nullptr);
351 }
352 }
353
354 // Expected ret codes are 0 (we ran out of packets) and EAGAIN (codec full)
355 if ((ret != 0) && (ret != AVERROR(EAGAIN))) {
356 // Some odd error happened. We can't proceed.
357 ffmpeg_cat.error()
358 << "avcodec_send_packet returned " << ret << "\n";
359 return false;
360 }
361
362 // Now we retrieve our frame!
363 ret = avcodec_receive_frame(_audio_ctx, _frame);
364
365 if (ret == AVERROR_EOF) {
366 // The only way for this to happen is if we're out of packets.
367 nassertr(_packet->data == nullptr, false);
368
369 // Synthesize silence:
370 _buffer_head = 0;
371 _buffer_tail = _buffer_size;
372 memset(_buffer, 0, _buffer_size * 2);
373 return true;
374
375 } else if (ret != 0) {
376 // Some odd error happened. We can't proceed.
377 ffmpeg_cat.error()
378 << "avcodec_receive_frame returned " << ret << "\n";
379 return false;
380 }
381
382 // We now have _frame. It will be handled below.
383
384#else
385 int got_frame = 0;
386 while (!got_frame) {
387 // If we're out of packets, generate silence.
388 if (_packet->data == nullptr) {
389 _buffer_head = 0;
390 _buffer_tail = _buffer_size;
391 memset(_buffer, 0, _buffer_size * 2);
392 return true;
393 } else if (_packet_size == 0) {
394 fetch_packet();
395 }
396
397 AVPacket *pkt;
398#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
399 pkt = av_packet_alloc();
400#else
401 AVPacket _pkt;
402 pkt = &_pkt;
403 av_init_packet(pkt);
404#endif
405 pkt->data = _packet_data;
406 pkt->size = _packet_size;
407
408 int len = avcodec_decode_audio4(_audio_ctx, _frame, &got_frame, pkt);
409 movies_debug("avcodec_decode_audio4 returned " << len);
410
411#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
412 av_packet_free(&pkt);
413#else
414 av_free_packet(pkt);
415#endif
416
417 if (len < 0) {
418 return false;
419 } else if (len == 0) {
420 return true;
421 }
422 _packet_data += len;
423 _packet_size -= len;
424 }
425#endif
426
427 int bufsize;
428#ifdef HAVE_SWRESAMPLE
429 if (_resample_ctx) {
430 // Resample the data to signed 16-bit sample format.
431 bufsize = swr_convert(_resample_ctx, (uint8_t **)&_buffer, _buffer_size / 2, (const uint8_t**)_frame->extended_data, _frame->nb_samples);
432 bufsize *= _audio_channels * 2;
433 } else
434#endif
435 {
436 bufsize = _frame->linesize[0];
437 memcpy(_buffer, _frame->data[0], bufsize);
438 }
439#if LIBAVUTIL_VERSION_INT > AV_VERSION_INT(52, 19, 100)
440 av_frame_unref(_frame);
441#endif
442
443 if (bufsize > 0) {
444 _buffer_head = 0;
445 _buffer_tail = (bufsize/2);
446 return true;
447 }
448 return true;
449}
450
451/**
452 * Seeks to a target location. Afterward, the packet_time is guaranteed to be
453 * less than or equal to the specified time.
454 */
456seek(double t) {
457 int64_t target_ts = (int64_t)(t / _audio_timebase);
458 if (target_ts < (int64_t)(_initial_dts)) {
459 // Attempts to seek before the first packet will fail.
460 target_ts = _initial_dts;
461 }
462 if (av_seek_frame(_format_ctx, _audio_index, target_ts, AVSEEK_FLAG_BACKWARD) < 0) {
463 ffmpeg_cat.error() << "Seek failure. Shutting down movie.\n";
464 cleanup();
465 return;
466 }
467 avcodec_flush_buffers(_audio_ctx);
468 _buffer_head = 0;
469 _buffer_tail = 0;
470 fetch_packet();
471 double ts = _packet->dts * _audio_timebase;
472 if (t > ts) {
473 int skip = (int)((t-ts) * _audio_rate);
474 read_samples(skip, nullptr);
475 }
476 _last_seek = t;
477 _samples_read = 0;
478}
479
480/**
481 * Read audio samples from the stream. N is the number of samples you wish to
482 * read. Your buffer must be equal in size to N * channels. Multiple-channel
483 * audio will be interleaved.
484 */
486read_samples(int n, int16_t *data) {
487 int desired = n * _audio_channels;
488
489 while (desired > 0) {
490 if (_buffer_head == _buffer_tail) {
491 if(!reload_buffer()){
492 break;
493 }
494 movies_debug("read_samples() desired samples: " << desired << " N:" << n);
495 }
496 int available = _buffer_tail - _buffer_head;
497 int ncopy = (desired > available) ? available : desired;
498 if (ncopy) {
499 if (data != nullptr) {
500 memcpy(data, _buffer + _buffer_head, ncopy * 2);
501 data += ncopy;
502 }
503 desired -= ncopy;
504 _buffer_head += ncopy;
505 }
506
507 }
508 _samples_read += n;
509}
virtual void seek(double offset)
Seeks to a target location.
FfmpegAudioCursor(FfmpegAudio *src)
xxx
virtual void read_samples(int n, int16_t *data)
Read audio samples from the stream.
virtual ~FfmpegAudioCursor()
xxx
A stream that generates a sequence of audio samples.
Definition ffmpegAudio.h:25
bool open_vfs(const Filename &filename)
Opens the movie file via Panda's VFS.
AVFormatContext * get_format_context() const
Returns a pointer to the opened ffmpeg context, or NULL if the file was not successfully opened.
void close()
Explicitly closes the opened file.
A MovieAudio is actually any source that provides a sequence of audio samples.
TypeHandle is the identifier used to differentiate C++ class types.
Definition typeHandle.h:81
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.