Panda3D
Loading...
Searching...
No Matches
ffmpegAudioCursor.cxx
Go to the documentation of this file.
1/**
2 * PANDA 3D SOFTWARE
3 * Copyright (c) Carnegie Mellon University. All rights reserved.
4 *
5 * All use of this software is subject to the terms of the revised BSD
6 * license. You should have received a copy of this license along
7 * with this source code in a file named "LICENSE."
8 *
9 * @file ffmpegAudioCursor.cxx
10 * @author jyelon
11 * @date 2007-08-01
12 */
13
14#include "config_ffmpeg.h"
15#include "ffmpegAudioCursor.h"
16
17#include "ffmpegAudio.h"
18extern "C" {
19 #include <libavutil/dict.h>
20 #include <libavutil/opt.h>
21 #include <libavcodec/avcodec.h>
22 #include <libavformat/avformat.h>
23}
24
25#ifdef HAVE_SWRESAMPLE
26extern "C" {
27 #include <libswresample/swresample.h>
28}
29#endif
30
31TypeHandle FfmpegAudioCursor::_type_handle;
32
33#if LIBAVFORMAT_VERSION_MAJOR < 53
34 #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO
35#endif
36
37#ifndef AVCODEC_MAX_AUDIO_FRAME_SIZE
38// More recent versions of ffmpeg no longer define this.
39#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
40#endif
41
42/**
43 * xxx
44 */
48 _filename(src->_filename),
49 _packet(nullptr),
50 _packet_data(nullptr),
51 _format_ctx(nullptr),
52 _audio_ctx(nullptr),
53 _resample_ctx(nullptr),
54 _buffer(nullptr),
55 _buffer_alloc(nullptr),
56 _frame(nullptr)
57{
58 if (!_ffvfile.open_vfs(_filename)) {
59 cleanup();
60 return;
61 }
62
63 _format_ctx = _ffvfile.get_format_context();
64 nassertv(_format_ctx != nullptr);
65
66 if (avformat_find_stream_info(_format_ctx, nullptr) < 0) {
67 cleanup();
68 return;
69 }
70
71 // As of libavformat version 57.41.100, AVStream.codec is deprecated in favor
72 // of AVStream.codecpar. Fortunately, the two structures have
73 // similarly-named members, so we can just switch out the declaration.
74#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
75 AVCodecParameters *codecpar;
76#else
77 AVCodecContext *codecpar;
78#endif
79
80 // Find the audio stream
81 AVStream *stream = nullptr;
82 for (int i = 0; i < (int)_format_ctx->nb_streams; i++) {
83#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
84 codecpar = _format_ctx->streams[i]->codecpar;
85#else
86 codecpar = _format_ctx->streams[i]->codec;
87#endif
88 if (codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
89 _audio_index = i;
90 stream = _format_ctx->streams[i];
91 break;
92 }
93 }
94
95 if (stream == nullptr) {
96 cleanup();
97 return;
98 }
99
100 _audio_timebase = av_q2d(stream->time_base);
101 _audio_rate = codecpar->sample_rate;
102 _audio_channels = codecpar->channels;
103
104 const AVCodec *pAudioCodec = avcodec_find_decoder(codecpar->codec_id);
105 if (pAudioCodec == nullptr) {
106 cleanup();
107 return;
108 }
109
110 _audio_ctx = avcodec_alloc_context3(pAudioCodec);
111
112 if (_audio_ctx == nullptr) {
113 cleanup();
114 return;
115 }
116
117#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 41, 100)
118 avcodec_parameters_to_context(_audio_ctx, codecpar);
119#else
120 avcodec_copy_context(_audio_ctx, codecpar);
121#endif
122
123 AVDictionary *opts = nullptr;
124 av_dict_set(&opts, "request_sample_fmt", "s16", 0);
125 if (avcodec_open2(_audio_ctx, pAudioCodec, nullptr) < 0) {
126 cleanup();
127 return;
128 }
129
130 av_dict_free(&opts);
131
132 // Set up the resample context if necessary.
133 if (_audio_ctx->sample_fmt != AV_SAMPLE_FMT_S16) {
134#ifdef HAVE_SWRESAMPLE
135 if (ffmpeg_cat.is_debug()) {
136 ffmpeg_cat.debug()
137 << "Codec does not use signed 16-bit sample format. Setting up swresample context.\n";
138 }
139
140 _resample_ctx = swr_alloc();
141 av_opt_set_int(_resample_ctx, "in_channel_count", _audio_channels, 0);
142 av_opt_set_int(_resample_ctx, "out_channel_count", _audio_channels, 0);
143 av_opt_set_int(_resample_ctx, "in_channel_layout", _audio_ctx->channel_layout, 0);
144 av_opt_set_int(_resample_ctx, "out_channel_layout", _audio_ctx->channel_layout, 0);
145 av_opt_set_int(_resample_ctx, "in_sample_rate", _audio_ctx->sample_rate, 0);
146 av_opt_set_int(_resample_ctx, "out_sample_rate", _audio_ctx->sample_rate, 0);
147 av_opt_set_sample_fmt(_resample_ctx, "in_sample_fmt", _audio_ctx->sample_fmt, 0);
148 av_opt_set_sample_fmt(_resample_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
149
150 if (swr_init(_resample_ctx) != 0) {
151 ffmpeg_cat.error()
152 << "Failed to set up resample context.\n";
153 _resample_ctx = nullptr;
154 }
155#else
156 ffmpeg_cat.error()
157 << "Codec does not use signed 16-bit sample format, but support for libswresample has not been enabled.\n";
158#endif
159 }
160
161 _length = (_format_ctx->duration * 1.0) / AV_TIME_BASE;
162 _can_seek = true;
163 _can_seek_fast = true;
164
165#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
166 _frame = av_frame_alloc();
167#else
168 _frame = avcodec_alloc_frame();
169#endif
170
171#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
172 _packet = av_packet_alloc();
173#else
174 _packet = new AVPacket;
175#endif
176
177 _buffer_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / 2;
178 _buffer_alloc = new int16_t[_buffer_size + 64];
179
180 // Allocate enough space for 1024 samples per channel.
181 if ((_packet == nullptr)||(_buffer_alloc == nullptr)) {
182 cleanup();
183 return;
184 }
185 memset(_packet, 0, sizeof(AVPacket));
186
187 // Align the buffer to a 64-byte boundary The ffmpeg codec likes this,
188 // because it uses SSESSE2.
189 _buffer = _buffer_alloc;
190 while (((size_t)_buffer) & 31) {
191 _buffer += 1;
192 }
193
194 fetch_packet();
195 _initial_dts = _packet->dts;
196 _last_seek = 0;
197 _samples_read = 0;
198 _buffer_head = 0;
199 _buffer_tail = 0;
200}
201
202/**
203 * xxx
204 */
207 cleanup();
208}
209
210/**
211 * Reset to a standard inactive state.
212 */
213void FfmpegAudioCursor::
214cleanup() {
215 if (_audio_ctx && _audio_ctx->codec) {
216#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
217 // We need to drain the codec to prevent a memory leak.
218 avcodec_send_packet(_audio_ctx, nullptr);
219 while (avcodec_receive_frame(_audio_ctx, _frame) == 0) {}
220 avcodec_flush_buffers(_audio_ctx);
221#endif
222
223 avcodec_close(_audio_ctx);
224#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 52, 0)
225 avcodec_free_context(&_audio_ctx);
226#else
227 av_free(_audio_ctx);
228#endif
229 }
230 _audio_ctx = nullptr;
231
232 if (_frame) {
233#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
234 av_frame_free(&_frame);
235#else
236 avcodec_free_frame(&_frame);
237#endif
238 _frame = nullptr;
239 }
240
241 if (_packet) {
242#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
243 av_packet_free(&_packet);
244#else
245 if (_packet->data) {
246 av_free_packet(_packet);
247 }
248 delete _packet;
249 _packet = nullptr;
250#endif
251 }
252
253 if (_buffer_alloc) {
254 delete[] _buffer_alloc;
255 _buffer_alloc = nullptr;
256 _buffer = nullptr;
257 }
258
259 if (_format_ctx) {
260 _ffvfile.close();
261 _format_ctx = nullptr;
262 }
263
264#ifdef HAVE_SWRESAMPLE
265 if (_resample_ctx) {
266 swr_free(&_resample_ctx);
267 _resample_ctx = nullptr;
268 }
269#endif
270
271 _audio_index = -1;
272}
273
274/**
275 * Fetches an audio packet and stores it in the packet buffer. Also sets
276 * packet_size and packet_data.
277 */
278void FfmpegAudioCursor::
279fetch_packet() {
280 if (_packet->data) {
281#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
282 av_packet_unref(_packet);
283#else
284 av_free_packet(_packet);
285#endif
286 }
287 while (av_read_frame(_format_ctx, _packet) >= 0) {
288 if (_packet->stream_index == _audio_index) {
289 _packet_size = _packet->size;
290 _packet_data = _packet->data;
291 return;
292 }
293#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
294 av_packet_unref(_packet);
295#else
296 av_free_packet(_packet);
297#endif
298 }
299 _packet->data = nullptr;
300 _packet_size = 0;
301 _packet_data = nullptr;
302}
303
304/**
305 * Reloads the audio buffer by decoding audio packets until one of those audio
306 * packets finally yields some samples. If we encounter the end of the
307 * stream, we synthesize silence.
308 */
309bool FfmpegAudioCursor::
310reload_buffer() {
311#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
312 // lavc >= 57.37.100 deprecates the old (avcodec_decode_audio*) API in favor
313 // of a newer, asynchronous API. This is great for our purposes - it gives
314 // the codec the opportunity to decode in the background (e.g. in another
315 // thread or on a dedicated hardware coprocessor).
316
317 // First, let's fill the codec's input buffer with as many packets as it'll
318 // take:
319 int ret = 0;
320 while (_packet->data != nullptr) {
321 ret = avcodec_send_packet(_audio_ctx, _packet);
322
323 if (ret != 0) {
324 // Nonzero return code is an error.
325 break;
326 }
327
328 // If we got here, the codec took the packet! Fetch another one.
329 fetch_packet();
330 if (_packet->data == nullptr) {
331 // fetch_packet() says we're out of packets. Let the codec know.
332 ret = avcodec_send_packet(_audio_ctx, nullptr);
333 }
334 }
335
336 // Expected ret codes are 0 (we ran out of packets) and EAGAIN (codec full)
337 if ((ret != 0) && (ret != AVERROR(EAGAIN))) {
338 // Some odd error happened. We can't proceed.
339 ffmpeg_cat.error()
340 << "avcodec_send_packet returned " << ret << "\n";
341 return false;
342 }
343
344 // Now we retrieve our frame!
345 ret = avcodec_receive_frame(_audio_ctx, _frame);
346
347 if (ret == AVERROR_EOF) {
348 // The only way for this to happen is if we're out of packets.
349 nassertr(_packet->data == nullptr, false);
350
351 // Synthesize silence:
352 _buffer_head = 0;
353 _buffer_tail = _buffer_size;
354 memset(_buffer, 0, _buffer_size * 2);
355 return true;
356
357 } else if (ret != 0) {
358 // Some odd error happened. We can't proceed.
359 ffmpeg_cat.error()
360 << "avcodec_receive_frame returned " << ret << "\n";
361 return false;
362 }
363
364 // We now have _frame. It will be handled below.
365
366#else
367 int got_frame = 0;
368 while (!got_frame) {
369 // If we're out of packets, generate silence.
370 if (_packet->data == nullptr) {
371 _buffer_head = 0;
372 _buffer_tail = _buffer_size;
373 memset(_buffer, 0, _buffer_size * 2);
374 return true;
375 } else if (_packet_size == 0) {
376 fetch_packet();
377 }
378
379 AVPacket *pkt;
380#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
381 pkt = av_packet_alloc();
382#else
383 AVPacket _pkt;
384 pkt = &_pkt;
385 av_init_packet(pkt);
386#endif
387 pkt->data = _packet_data;
388 pkt->size = _packet_size;
389
390 int len = avcodec_decode_audio4(_audio_ctx, _frame, &got_frame, pkt);
391 movies_debug("avcodec_decode_audio4 returned " << len);
392
393#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
394 av_packet_free(&pkt);
395#else
396 av_free_packet(pkt);
397#endif
398
399 if (len < 0) {
400 return false;
401 } else if (len == 0) {
402 return true;
403 }
404 _packet_data += len;
405 _packet_size -= len;
406 }
407#endif
408
409 int bufsize;
410#ifdef HAVE_SWRESAMPLE
411 if (_resample_ctx) {
412 // Resample the data to signed 16-bit sample format.
413 bufsize = swr_convert(_resample_ctx, (uint8_t **)&_buffer, _buffer_size / 2, (const uint8_t**)_frame->extended_data, _frame->nb_samples);
414 bufsize *= _audio_channels * 2;
415 } else
416#endif
417 {
418 bufsize = _frame->linesize[0];
419 memcpy(_buffer, _frame->data[0], bufsize);
420 }
421#if LIBAVUTIL_VERSION_INT > AV_VERSION_INT(52, 19, 100)
422 av_frame_unref(_frame);
423#endif
424
425 if (bufsize > 0) {
426 _buffer_head = 0;
427 _buffer_tail = (bufsize/2);
428 return true;
429 }
430 return true;
431}
432
433/**
434 * Seeks to a target location. Afterward, the packet_time is guaranteed to be
435 * less than or equal to the specified time.
436 */
438seek(double t) {
439 int64_t target_ts = (int64_t)(t / _audio_timebase);
440 if (target_ts < (int64_t)(_initial_dts)) {
441 // Attempts to seek before the first packet will fail.
442 target_ts = _initial_dts;
443 }
444 if (av_seek_frame(_format_ctx, _audio_index, target_ts, AVSEEK_FLAG_BACKWARD) < 0) {
445 ffmpeg_cat.error() << "Seek failure. Shutting down movie.\n";
446 cleanup();
447 return;
448 }
449 avcodec_flush_buffers(_audio_ctx);
450 _buffer_head = 0;
451 _buffer_tail = 0;
452 fetch_packet();
453 double ts = _packet->dts * _audio_timebase;
454 if (t > ts) {
455 int skip = (int)((t-ts) * _audio_rate);
456 read_samples(skip, nullptr);
457 }
458 _last_seek = t;
459 _samples_read = 0;
460}
461
462/**
463 * Read audio samples from the stream. N is the number of samples you wish to
464 * read. Your buffer must be equal in size to N * channels. Multiple-channel
465 * audio will be interleaved.
466 */
468read_samples(int n, int16_t *data) {
469 int desired = n * _audio_channels;
470
471 while (desired > 0) {
472 if (_buffer_head == _buffer_tail) {
473 if(!reload_buffer()){
474 break;
475 }
476 movies_debug("read_samples() desired samples: " << desired << " N:" << n);
477 }
478 int available = _buffer_tail - _buffer_head;
479 int ncopy = (desired > available) ? available : desired;
480 if (ncopy) {
481 if (data != nullptr) {
482 memcpy(data, _buffer + _buffer_head, ncopy * 2);
483 data += ncopy;
484 }
485 desired -= ncopy;
486 _buffer_head += ncopy;
487 }
488
489 }
490 _samples_read += n;
491}
virtual void seek(double offset)
Seeks to a target location.
FfmpegAudioCursor(FfmpegAudio *src)
xxx
virtual void read_samples(int n, int16_t *data)
Read audio samples from the stream.
virtual ~FfmpegAudioCursor()
xxx
A stream that generates a sequence of audio samples.
Definition ffmpegAudio.h:25
bool open_vfs(const Filename &filename)
Opens the movie file via Panda's VFS.
AVFormatContext * get_format_context() const
Returns a pointer to the opened ffmpeg context, or NULL if the file was not successfully opened.
void close()
Explicitly closes the opened file.
A MovieAudio is actually any source that provides a sequence of audio samples.
TypeHandle is the identifier used to differentiate C++ class types.
Definition typeHandle.h:81
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.