11 #include <OpenGL/CGLMacro.h>
19 "title" :
"VuoFfmpegDecoder",
25 "VuoList_VuoAudioSamples",
38 #define AUDIO_DIFF_AVG_NB 20
40 VuoFfmpegDecoder::VuoFfmpegDecoder(
VuoUrl url)
42 static dispatch_once_t pred;
43 dispatch_once(&pred, ^
45 avformat_network_init();
59 if(!dec->Initialize())
71 bool VuoFfmpegDecoder::Initialize()
73 container.formatCtx = NULL;
74 container.videoCodecCtx = NULL;
75 container.audioCodecCtx = NULL;
80 lastVideoTimestamp = 0;
82 lastAudioTimestamp = 0;
83 showedTimestampGapWarning =
false;
84 showedSeekIgnoredWarning =
false;
88 AVDictionary *opts = 0;
89 av_dict_set(&opts,
"rtsp_transport",
"tcp", 0);
90 int ret = avformat_open_input(&(container.formatCtx), mVideoPath, NULL, &opts);
94 if (ret == AVERROR(ENOENT))
100 ret = avformat_open_input(&(container.formatCtx), path, NULL, &opts);
104 if (ret != 0 || !container.formatCtx)
106 VUserLog(
"Error: FFmpeg could not open \"%s\" — %s", mVideoPath, av_err2str(ret));
110 VDebugLog(
"FFmpeg context flags: 0x%x", container.formatCtx->ctx_flags);
111 if (container.formatCtx->iformat)
112 VDebugLog(
"FFmpeg input format : '%s' (%s) flags=0x%x codec=0x%x",
113 container.formatCtx->iformat->long_name,
114 container.formatCtx->iformat->name,
115 container.formatCtx->iformat->flags,
116 container.formatCtx->iformat->raw_codec_id);
119 ret = avformat_find_stream_info(container.formatCtx, NULL);
121 VUserLog(
"Warning: FFmpeg could not find video stream information in \"%s\" — %s.", mVideoPath, av_err2str(ret));
126 if(!InitializeVideo(container))
128 VUserLog(
"Failed initializing video stream.");
133 if( !InitializeAudio(container) )
134 audioIsEnabled =
false;
137 if( !InitializeVideoInfo() )
139 VUserLog(
"Error: FFmpeg failed to decode the first video frame.");
149 bool VuoFfmpegDecoder::InitializeVideo(VuoFfmpegDecoder::AVContainer& container)
151 if(container.videoStreamIndex < 0)
153 VUserLog(
"Error: FFmpeg could not find a video stream in file \"%s\".", mVideoPath);
157 container.videoStream = container.formatCtx->streams[container.videoStreamIndex];
160 AVCodec* videoCodec = avcodec_find_decoder(container.videoStream->codecpar->codec_id);
162 if(videoCodec == NULL)
164 VUserLog(
"Error: FFmpeg could not find a suitable decoder for file \"%s\".", mVideoPath);
168 VDebugLog(
"FFmpeg video codec : '%s' (%s)",
169 videoCodec->long_name,
172 container.videoCodecCtx = avcodec_alloc_context3(videoCodec);
173 if (!container.videoCodecCtx)
175 VUserLog(
"Error: FFmpeg could not allocate the decoder context for \"%s\".", mVideoPath);
178 if (avcodec_parameters_to_context(container.videoCodecCtx, container.videoStream->codecpar) < 0)
180 VUserLog(
"Error: FFmpeg could not find the codec for \"%s\".", mVideoPath);
186 videoPackets.destructor = av_packet_unref;
187 videoFrames.destructor = VideoFrame::Delete;
190 lastDecodedVideoPts = 0;
191 lastSentVideoPts = 0;
195 if(videoCodec->id == AV_CODEC_ID_FLV1 || videoCodec->id == AV_CODEC_ID_GIF)
196 container.seekUnavailable =
true;
199 if(avcodec_open2(container.videoCodecCtx, videoCodec, NULL) < 0)
201 VUserLog(
"Error: FFmpeg could not find the codec for \"%s\".", mVideoPath);
208 bool VuoFfmpegDecoder::InitializeAudio(VuoFfmpegDecoder::AVContainer& container)
210 if(container.audioStreamIndex < 0)
216 audioPackets.destructor = av_packet_unref;
217 audioFrames.destructor = AudioFrame::Delete;
220 AVCodec *audioCodec = NULL;
222 container.audioStream = container.formatCtx->streams[container.audioStreamIndex];
225 audioCodec = avcodec_find_decoder(container.audioStream->codecpar->codec_id);
227 container.audioCodecCtx = avcodec_alloc_context3(audioCodec);
228 if (!container.audioCodecCtx)
230 VUserLog(
"Error: FFmpeg could not allocate the decoder context for \"%s\".", mVideoPath);
233 if (avcodec_parameters_to_context(container.audioCodecCtx, container.audioStream->codecpar) < 0)
235 VUserLog(
"Error: FFmpeg could not find the codec for \"%s\".", mVideoPath);
240 if (audioCodec == NULL || (ret = avcodec_open2(container.audioCodecCtx, audioCodec, NULL)) < 0)
242 VUserLog(
"Error: Unsupported audio codec %s: %s", avcodec_get_name(container.audioCodecCtx->codec_id), av_err2str(ret));
249 VDebugLog(
"FFmpeg audio codec : '%s' (%s)",
250 audioCodec->long_name,
253 container.swr_ctx = swr_alloc();
255 if (!container.swr_ctx)
257 VUserLog(
"Error: FFmpeg could not allocate resampler context.");
258 container.audioStreamIndex = -1;
265 int src_ch_layout = container.audioCodecCtx->channel_layout;
266 int src_rate = container.audioCodecCtx->sample_rate;
267 audio_channels = container.audioCodecCtx->channels;
268 AVSampleFormat src_sample_fmt = container.audioCodecCtx->sample_fmt;
271 AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_DBLP;
273 av_opt_set_int(container.swr_ctx,
"in_channel_layout", src_ch_layout, 0);
274 av_opt_set_int(container.swr_ctx,
"in_sample_rate", src_rate, 0);
275 av_opt_set_sample_fmt(container.swr_ctx,
"in_sample_fmt", src_sample_fmt, 0);
277 av_opt_set_int(container.swr_ctx,
"out_channel_layout", src_ch_layout, 0);
279 av_opt_set_sample_fmt(container.swr_ctx,
"out_sample_fmt", dst_sample_fmt, 0);
281 container.bytesPerAudioSample = av_get_bytes_per_sample(src_sample_fmt);
285 audio_diff_avg_count = 0;
290 audio_buf = (uint8_t **)calloc(container.audioCodecCtx->channels,
sizeof(uint8_t *));
295 ret = av_new_packet(&audio_packet, 32);
298 VUserLog(
"Error: Couldn't create a packet: %s", av_err2str(ret));
300 container.audioStreamIndex = -1;
303 audio_pkt_data = NULL;
309 audio_diff_threshold = 1./30;
311 if ((ret = swr_init(container.swr_ctx)) < 0)
313 VUserLog(
"Error: Could not initialize audio converter context. The audio track may be corrupt, or empty.\n%s", av_err2str(ret));
315 container.audioStreamIndex = -1;
324 bool VuoFfmpegDecoder::InitializeVideoInfo()
326 if( !DecodeVideoFrame() )
328 VDebugLog(
"Coudn't find first video frame!");
333 videoFrames.Peek(&frame);
336 SeekToPts(frame.pts, NULL);
338 if(!DecodeVideoFrame())
340 VDebugLog(
"Couldn't decode first video frame (2)");
344 VideoInfo& vi = container.videoInfo;
345 videoFrames.Peek(&frame);
346 vi.first_pts = frame.pts;
348 if(container.videoStream->duration != AV_NOPTS_VALUE)
351 vi.duration = container.videoStream->duration;
353 vi.duration = AV_NOPTS_VALUE;
355 vi.last_pts = AV_NOPTS_VALUE;
356 vi.max_pts = AV_NOPTS_VALUE;
358 SeekToPts(vi.first_pts, NULL);
363 if (!DecodeAudioFrame())
364 VUserLog(
"Warning: Couldn't decode the first audio frame.");
366 VideoInfo& ai = container.audioInfo;
370 if(audioFrames.Peek(&aframe))
372 ai.first_pts = aframe.pts;
373 ai.last_pts = AV_NOPTS_VALUE;
374 ai.duration = container.audioStream->duration;
379 ai.last_pts = AV_NOPTS_VALUE;
380 ai.duration = container.audioStream->duration;
392 if(mVideoPath != NULL)
397 videoPackets.Clear();
401 if(audio_buf != NULL && audio_buf_size > 0)
403 for(
int i = 0; i < audio_channels; i++)
412 audioPackets.Clear();
415 if(container.formatCtx != NULL) avformat_close_input(&container.formatCtx);
423 return container.audioStreamIndex < 0 || audio_channels > 0;
428 return audio_channels;
434 bool VuoFfmpegDecoder::NextPacket()
439 while((ret = av_read_frame(container.formatCtx, &packet)) >= 0)
441 if( packet.stream_index == container.videoStreamIndex ||
442 packet.stream_index == container.audioStreamIndex )
445 if (av_packet_ref(&pkt, &packet) < 0)
448 if( packet.stream_index == container.videoStreamIndex )
449 videoPackets.Add(pkt);
450 else if( packet.stream_index == container.audioStreamIndex && audioIsEnabled )
451 audioPackets.Add(pkt);
457 av_packet_unref(&packet);
461 if (ret != AVERROR_EOF)
462 VUserLog(
"Error: %s", av_err2str(ret));
469 VideoFrame queuedFrame;
471 if( mPlaybackRate >= 0 )
473 while(!videoFrames.Shift(&queuedFrame))
475 if(!DecodeVideoFrame())
481 while(!videoFrames.Pop(&queuedFrame))
483 if(!DecodePreceedingVideoFrames())
488 videoFrame->image = queuedFrame.image;
489 videoFrame->timestamp = queuedFrame.timestamp;
490 videoFrame->duration = queuedFrame.duration;
492 lastVideoTimestamp = queuedFrame.timestamp;
493 lastSentVideoPts = queuedFrame.pts;
496 if (audioIsEnabled && !seeking && AudioOffset() < MAX_AUDIO_LATENCY)
498 VDebugLog(
"dup video frame: v: %.3f, a: %.3f => %f", lastVideoTimestamp, lastAudioTimestamp, AudioOffset());
500 videoFrames.Unshift(queuedFrame);
511 bool VuoFfmpegDecoder::StepVideoFrame(int64_t pts,
VuoVideoFrame *frame)
516 VideoFrame queuedFrame;
518 const double floatingPointError = 0.0001;
522 while(!videoFrames.Shift(&queuedFrame))
524 if(!DecodeVideoFrame())
529 if (queuedFrame.image)
535 *frame =
VuoVideoFrame_make(queuedFrame.image, queuedFrame.timestamp, queuedFrame.duration);
540 }
while (queuedFrame.timestamp + queuedFrame.duration < requestedFrameTime + floatingPointError);
542 lastVideoTimestamp = queuedFrame.timestamp;
543 lastSentVideoPts = queuedFrame.pts;
545 while(videoFrames.Shift(&queuedFrame))
546 if (queuedFrame.image)
552 bool VuoFfmpegDecoder::StepAudioFrame(int64_t pts)
554 AudioFrame audioFrame;
558 while(!audioFrames.Shift(&audioFrame))
560 if(!DecodeAudioFrame())
566 }
while(audioFrame.pts < pts);
568 lastAudioTimestamp = audioFrame.timestamp;
571 while (audioFrames.Shift(&audioFrame));
576 bool VuoFfmpegDecoder::DecodePreceedingVideoFrames()
581 vframe.timestamp = lastVideoTimestamp;
583 if(videoFrames.Shift(&vframe))
585 if(vframe.pts <= container.videoInfo.first_pts)
593 if(lastSentVideoPts <= container.videoInfo.first_pts)
601 const double mReversePlaybackStep = 1.;
603 double currentTimestamp = lastVideoTimestamp;
604 double seekTarget = currentTimestamp - mReversePlaybackStep;
607 LLNode<VideoFrame>* first = videoFrames.first;
608 LLNode<VideoFrame>* last = videoFrames.last;
609 videoFrames.first = NULL;
610 videoFrames.last = NULL;
615 vframe.timestamp = lastVideoTimestamp;
617 while(vframe.timestamp < currentTimestamp)
619 if(!DecodeVideoFrame())
624 videoFrames.PeekLast(&vframe);
627 if(videoFrames.Pop(&vframe))
628 VideoFrame::Delete(&vframe);
633 videoFrames.last->next = first;
634 first->previous = videoFrames.last;
635 videoFrames.last = last;
641 bool VuoFfmpegDecoder::DecodeVideoFrame()
643 AVFrame* frame = av_frame_alloc();
644 int frameFinished = 0;
646 av_init_packet(&packet);
647 unsigned int skips = 0;
651 while(!frameFinished)
653 while(!videoPackets.Shift(&packet))
660 av_new_packet(&packet, 0);
666 #pragma clang diagnostic push
667 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
668 avcodec_decode_video2(container.videoCodecCtx, frame, &frameFinished, &packet);
669 #pragma clang diagnostic pop
671 int ret = avcodec_send_packet(container.videoCodecCtx, &packet);
673 || ret == AVERROR(EAGAIN))
675 ret = avcodec_receive_frame(container.videoCodecCtx, frame);
678 else if (ret == AVERROR(EAGAIN))
682 VUserLog(
"avcodec_receive_frame error: %s", av_err2str(ret));
684 else if (ret == AVERROR(EINVAL))
688 VUserLog(
"avcodec_send_packet error: %s", av_err2str(ret));
691 if (frameFinished == 0 && packet.size == 0)
696 VideoInfo& v = container.videoInfo;
699 if( v.last_pts == AV_NOPTS_VALUE && v.max_pts != AV_NOPTS_VALUE )
700 v.last_pts = v.max_pts;
702 av_frame_free(&frame);
707 if( frameFinished && frame != NULL)
710 int64_t pts = frame->best_effort_timestamp;
714 int64_t ptsDelta = pts - lastDecodedVideoPts;
715 if (packet.duration > 0
716 && lastDecodedVideoPts > 0
717 && ptsDelta > packet.duration * 100
718 && ptsDelta < 0x7000000000000000)
720 if (!showedTimestampGapWarning)
722 VUserLog(
"Warning: The video stream has a large timestamp gap. Using estimated timestamps instead.");
723 showedTimestampGapWarning =
true;
725 pts = lastDecodedVideoPts + packet.duration;
728 int64_t duration = packet.duration == 0 ? pts - lastDecodedVideoPts : packet.duration;
729 lastDecodedVideoPts = pts;
731 if( container.videoInfo.max_pts == AV_NOPTS_VALUE || lastDecodedVideoPts > container.videoInfo.max_pts )
732 container.videoInfo.max_pts = lastDecodedVideoPts;
734 if( skips < MAX_FRAME_SKIP && !seeking && AudioOffset() > MAX_AUDIO_LEAD )
740 if( lastAudioTimestamp - predicted_timestamp > MAX_AUDIO_LATENCY )
742 av_packet_unref(&packet);
743 av_init_packet(&packet);
744 av_frame_free(&frame);
745 frame = av_frame_alloc();
747 frameFinished =
false;
748 goto SKIP_VIDEO_FRAME;
752 av_packet_unref(&packet);
755 VideoFrame vframe = (VideoFrame)
763 if(vframe.image != NULL)
766 videoFrames.Add(vframe);
767 av_frame_free(&frame);
770 VDebugLog(
"skip frame: v:%f a:%f ==> %f", lastVideoTimestamp, lastAudioTimestamp, AudioOffset());
777 av_frame_free(&frame);
778 av_packet_unref(&packet);
786 if (!audioIsEnabled || audio->channels == NULL)
792 unsigned int sampleIndex = 0;
794 unsigned int copySize = 0;
797 for(
int i = 0; i < audio_channels; i++)
805 while(sampleIndex < sampleSize)
808 if(audio_buf_index >= audio_buf_size)
810 if( !FillAudioBuffer() )
814 audio->timestamp = lastAudioTimestamp;
817 copySize = audio_buf_size - audio_buf_index;
820 if(copySize + sampleIndex > sampleSize)
821 copySize = sampleSize - sampleIndex;
823 for(
int i = 0; i < audio_channels; i++)
826 memcpy(samples.
samples + sampleIndex/
sizeof(
double), audio_buf[i] + audio_buf_index, copySize);
829 sampleIndex += copySize;
830 audio_buf_index += copySize;
836 void VuoFfmpegDecoder::ClearAudioBuffer()
839 if(audio_buf_size > 0)
841 for(
int i = 0; i < audio_channels; i++)
843 if(audio_buf[i] != NULL)
844 free( audio_buf[i] );
858 bool VuoFfmpegDecoder::FillAudioBuffer()
862 AudioFrame audioFrame;
864 while(!audioFrames.Shift(&audioFrame))
866 if(!DecodeAudioFrame())
868 lastAudioTimestamp = -1;
873 lastAudioTimestamp = audioFrame.timestamp;
874 audio_buf = audioFrame.samples;
875 audio_buf_size = audioFrame.size;
880 double VuoFfmpegDecoder::AudioOffset()
882 if(
ContainsAudio() && audioIsEnabled && lastAudioTimestamp != -1 )
883 return lastAudioTimestamp - lastVideoTimestamp;
888 bool VuoFfmpegDecoder::DecodeAudioFrame()
890 AVFrame* frame = av_frame_alloc();
891 container.audioCodecCtx->request_sample_fmt = AV_SAMPLE_FMT_FLTP;
894 uint8_t** samples = seeking ? NULL : (uint8_t**)malloc(
sizeof(uint8_t*) * audio_channels);
897 int converted_sample_count = 0;
903 while(audio_pkt_size > 0)
908 #pragma clang diagnostic push
909 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
910 len1 = avcodec_decode_audio4(container.audioCodecCtx, frame, &got_frame, &audio_packet);
911 #pragma clang diagnostic pop
913 int ret = avcodec_send_packet(container.audioCodecCtx, &audio_packet);
916 || ret == AVERROR(EAGAIN))
918 ret = avcodec_receive_frame(container.audioCodecCtx, frame);
922 len1 = audio_packet.size;
924 else if (ret == AVERROR(EAGAIN))
933 VUserLog(
"avcodec_receive_frame error: %s", av_err2str(ret));
935 else if (ret == AVERROR(EINVAL))
940 VUserLog(
"avcodec_send_packet error: %s", av_err2str(ret));
943 if (ret == AVERROR_INVALIDDATA && packetsSent > 5)
955 audio_pkt_data += len1;
956 audio_pkt_size -= len1;
960 int64_t pts = frame->best_effort_timestamp;
964 AudioFrame audioFrame = {
972 audioFrames.Add(audioFrame);
974 av_frame_free(&frame);
978 lastDecodedAudioPts = pts;
985 int dst_nb_samples = av_rescale_rnd(swr_get_delay(container.swr_ctx, container.audioCodecCtx->sample_rate) +
989 int ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, audio_channels, dst_nb_samples, AV_SAMPLE_FMT_DBLP, 0);
993 VUserLog(
"av_samples_alloc_array_and_samples error: %s", av_err2str(ret));
1001 ret = swr_convert(container.swr_ctx, dst_data, dst_nb_samples, (
const uint8_t **)frame->data, frame->nb_samples);
1002 if(ret < 0)
VUserLog(
"Failed conversion!");
1004 converted_sample_count = ret;
1012 for(
int i = 0; i < audio_channels; i++)
1014 samples[i] = (uint8_t*)malloc(dst_linesize);
1015 memcpy(samples[i], dst_data[i], dst_linesize);
1018 av_frame_free(&frame);
1022 av_freep(&dst_data[0]);
1023 av_freep(&dst_data);
1026 AudioFrame audioFrame = {
1027 (
unsigned int)(converted_sample_count *
sizeof(
double)),
1034 audioFrames.Add(audioFrame);
1040 if(audio_pkt_data != NULL)
1041 av_packet_unref(&audio_packet);
1043 while( !audioPackets.Shift(&audio_packet) )
1047 audio_pkt_data = NULL;
1053 audio_pkt_data = audio_packet.data;
1054 audio_pkt_size = audio_packet.size;
1062 SeekToPts(pts, frame);
1070 void VuoFfmpegDecoder::SeekToPts(int64_t pts,
VuoVideoFrame *frame)
1072 int64_t target_pts = pts;
1075 videoPackets.Clear();
1076 videoFrames.Clear();
1078 avcodec_flush_buffers(container.videoCodecCtx);
1083 audioPackets.Clear();
1084 audioFrames.Clear();
1085 avcodec_flush_buffers(container.audioCodecCtx);
1090 if (container.formatCtx->iformat->flags & AVFMT_NOFILE)
1092 if (!showedSeekIgnoredWarning)
1094 VUserLog(
"Warning: Ignoring seeks, since this is a stream (not a file).");
1095 showedSeekIgnoredWarning =
true;
1099 ret = av_seek_frame(container.formatCtx, container.videoStreamIndex, target_pts, AVSEEK_FLAG_BACKWARD);
1102 VDebugLog(
"Warning: av_seek_frame() failed: %s", av_err2str(ret));
1109 lastSentVideoPts = target_pts;
1112 if (!StepVideoFrame(pts, frame))
1113 VUserLog(
"Warning: Couldn't seek video.");
1117 int64_t audioPts = av_rescale_q(pts, container.videoStream->time_base, container.audioStream->time_base);
1119 if( audioIsEnabled )
1120 if (!StepAudioFrame(audioPts))
1121 VUserLog(
"Warning: Couldn't seek audio.");
1129 return audio_channels > 0;
1134 int64_t duration = container.videoInfo.duration;
1136 if(duration == AV_NOPTS_VALUE)
1138 if(container.videoInfo.last_pts == AV_NOPTS_VALUE)
1140 if (container.formatCtx->iformat->flags & AVFMT_NOFILE)
1144 container.videoInfo.duration = container.formatCtx->duration;
1151 if (!StepVideoFrame(INT64_MAX, NULL))
1152 VUserLog(
"Warning: Couldn't seek to end of video.");
1154 container.videoInfo.duration = container.videoInfo.last_pts - container.videoInfo.first_pts;
1169 bool audioWasEnabled = audioIsEnabled;
1170 audioIsEnabled = (
ContainsAudio() && fabs(rate - 1.) < .00001);
1172 if( (!audioWasEnabled && audioIsEnabled) || rate > 0 != mPlaybackRate > 0 )
1174 mPlaybackRate = rate;
1179 mPlaybackRate = rate;
1185 return lastVideoTimestamp;
1190 return av_q2d(container.videoStream->avg_frame_rate);