Vuo  2.1.1
VuoFfmpegDecoder.cc
Go to the documentation of this file.
1 
10 #include "VuoFfmpegDecoder.h"
11 #include <OpenGL/CGLMacro.h>
12 #include "VuoFfmpegUtility.h"
13 
14 extern "C"
15 {
16 
17 #ifdef VUO_COMPILER
19  "title" : "VuoFfmpegDecoder",
20  "dependencies" : [
21  "VuoImage",
22  "VuoAudioFrame",
23  "VuoAudioSamples",
24  "VuoReal",
25  "VuoList_VuoAudioSamples",
26  "VuoList_VuoReal",
27  "avcodec",
28  "avformat",
29  "avutil",
30  "swscale",
31  "swresample"
32  ]
33  });
34 #endif
35 }
36 
38 #define AUDIO_DIFF_AVG_NB 20
39 
40 VuoFfmpegDecoder::VuoFfmpegDecoder(VuoUrl url)
41 {
42  static dispatch_once_t pred;
43  dispatch_once(&pred, ^
44  {
45  avformat_network_init();
46 
47  av_log_set_level(VuoIsDebugEnabled() ? AV_LOG_VERBOSE : AV_LOG_FATAL);
48  });
49 
50  mPlaybackRate = 1.;
51  mVideoPath = url;
52  VuoRetain(mVideoPath);
53 }
54 
56 {
57  VuoFfmpegDecoder* dec = new VuoFfmpegDecoder(url);
58 
59  if(!dec->Initialize())
60  {
61  delete dec;
62  return NULL;
63  }
64  else
65  {
66  dec->isReady = true;
67  return dec;
68  }
69 }
70 
71 bool VuoFfmpegDecoder::Initialize()
72 {
73  container.formatCtx = NULL;
74  container.videoCodecCtx = NULL;
75  container.audioCodecCtx = NULL;
76  audio_buf = NULL;
77  seeking = false;
78 
79  // these will be set with real values in InitializeInfo()
80  lastVideoTimestamp = 0;
81  lastSentVideoPts = 0;
82  lastAudioTimestamp = 0;
83  showedTimestampGapWarning = false;
84  showedSeekIgnoredWarning = false;
85 
86  VDebugLog("Initialize: %s", mVideoPath);
87 
88  AVDictionary *opts = 0;
89  av_dict_set(&opts, "rtsp_transport", "tcp", 0);
90  int ret = avformat_open_input(&(container.formatCtx), mVideoPath, NULL, &opts);
91 
92  // If opening the full normalized URL failed, try again with just the POSIX path.
93  // (FFmpeg doesn't seem to like percent-encoded `file:///` URLs.)
94  if (ret == AVERROR(ENOENT))
95  {
96  VuoText path = VuoUrl_getPosixPath(mVideoPath);
97  if (path)
98  {
99  VuoLocal(path);
100  ret = avformat_open_input(&(container.formatCtx), path, NULL, &opts);
101  }
102  }
103 
104  if (ret != 0 || !container.formatCtx)
105  {
106  VUserLog("Error: FFmpeg could not open \"%s\" — %s", mVideoPath, av_err2str(ret));
107  return false;
108  }
109 
110  VDebugLog("FFmpeg context flags: 0x%x", container.formatCtx->ctx_flags);
111  if (container.formatCtx->iformat)
112  VDebugLog("FFmpeg input format : '%s' (%s) flags=0x%x codec=0x%x",
113  container.formatCtx->iformat->long_name,
114  container.formatCtx->iformat->name,
115  container.formatCtx->iformat->flags,
116  container.formatCtx->iformat->raw_codec_id);
117 
118  // Load video context
119  ret = avformat_find_stream_info(container.formatCtx, NULL);
120  if (ret < 0)
121  VUserLog("Warning: FFmpeg could not find video stream information in \"%s\" — %s.", mVideoPath, av_err2str(ret));
122 
123  container.videoStreamIndex = VuoFfmpegUtility::FirstStreamIndexWithMediaType(container.formatCtx, AVMEDIA_TYPE_VIDEO);
124  container.audioStreamIndex = VuoFfmpegUtility::FirstStreamIndexWithMediaType(container.formatCtx, AVMEDIA_TYPE_AUDIO);
125 
126  if(!InitializeVideo(container))
127  {
128  VUserLog("Failed initializing video stream.");
129  return false;
130  }
131 
132  // don't care if audio initializes or not, since video can still play without audio
133  if( !InitializeAudio(container) )
134  audioIsEnabled = false;
135 
136  // Set metadata
137  if( !InitializeVideoInfo() )
138  {
139  VUserLog("Error: FFmpeg failed to decode the first video frame.");
140  return false;
141  }
142 
143  // VLog("Duration: %f", GetDuration());
144  // VLog("Framerate: %f", av_q2d(container.videoStream->avg_frame_rate));
145 
146  return true;
147 }
148 
149 bool VuoFfmpegDecoder::InitializeVideo(VuoFfmpegDecoder::AVContainer& container)
150 {
151  if(container.videoStreamIndex < 0)
152  {
153  VUserLog("Error: FFmpeg could not find a video stream in file \"%s\".", mVideoPath);
154  return false;
155  }
156 
157  container.videoStream = container.formatCtx->streams[container.videoStreamIndex];
158  // container.videoCodecCtx->thread_count = 1;
159 
160  AVCodec* videoCodec = avcodec_find_decoder(container.videoStream->codecpar->codec_id);
161 
162  if(videoCodec == NULL)
163  {
164  VUserLog("Error: FFmpeg could not find a suitable decoder for file \"%s\".", mVideoPath);
165  return false;
166  }
167 
168  VDebugLog("FFmpeg video codec : '%s' (%s)",
169  videoCodec->long_name,
170  videoCodec->name);
171 
172  container.videoCodecCtx = avcodec_alloc_context3(videoCodec);
173  if (!container.videoCodecCtx)
174  {
175  VUserLog("Error: FFmpeg could not allocate the decoder context for \"%s\".", mVideoPath);
176  return false;
177  }
178  if (avcodec_parameters_to_context(container.videoCodecCtx, container.videoStream->codecpar) < 0)
179  {
180  VUserLog("Error: FFmpeg could not find the codec for \"%s\".", mVideoPath);
181  return false;
182  }
183 
184 
185  // Open video packet queue
186  videoPackets.destructor = av_packet_unref;
187  videoFrames.destructor = VideoFrame::Delete;
188 
189  // this will be set in the Initialize() function after audio is also loaded
190  lastDecodedVideoPts = 0;
191  lastSentVideoPts = 0;
192  lastVideoTimestamp = VuoFfmpegUtility::AvTimeToSecond(container.videoStream, lastDecodedVideoPts);
193 
194  // Flash can't seek, so when seeking just jet to 0 and step
195  if(videoCodec->id == AV_CODEC_ID_FLV1 || videoCodec->id == AV_CODEC_ID_GIF)
196  container.seekUnavailable = true;
197 
198  // Open codec
199  if(avcodec_open2(container.videoCodecCtx, videoCodec, NULL) < 0)
200  {
201  VUserLog("Error: FFmpeg could not find the codec for \"%s\".", mVideoPath);
202  return false;
203  }
204 
205  return true;
206 }
207 
208 bool VuoFfmpegDecoder::InitializeAudio(VuoFfmpegDecoder::AVContainer& container)
209 {
210  if(container.audioStreamIndex < 0)
211  {
212  audio_channels = 0;
213  return false;
214  }
215 
216  audioPackets.destructor = av_packet_unref;
217  audioFrames.destructor = AudioFrame::Delete;
218 
219  // And the audio stream (if applicable)
220  AVCodec *audioCodec = NULL;
221 
222  container.audioStream = container.formatCtx->streams[container.audioStreamIndex];
223  // container.audioCodecCtx->thread_count = 1;
224 
225  audioCodec = avcodec_find_decoder(container.audioStream->codecpar->codec_id);
226 
227  container.audioCodecCtx = avcodec_alloc_context3(audioCodec);
228  if (!container.audioCodecCtx)
229  {
230  VUserLog("Error: FFmpeg could not allocate the decoder context for \"%s\".", mVideoPath);
231  return false;
232  }
233  if (avcodec_parameters_to_context(container.audioCodecCtx, container.audioStream->codecpar) < 0)
234  {
235  VUserLog("Error: FFmpeg could not find the codec for \"%s\".", mVideoPath);
236  return false;
237  }
238 
239  int ret = -1;
240  if (audioCodec == NULL || (ret = avcodec_open2(container.audioCodecCtx, audioCodec, NULL)) < 0)
241  {
242  VUserLog("Error: Unsupported audio codec %s: %s", avcodec_get_name(container.audioCodecCtx->codec_id), av_err2str(ret));
243  // container.audioStreamIndex = -1;
244  audio_channels = 0;
245  return false;
246  }
247  else
248  {
249  VDebugLog("FFmpeg audio codec : '%s' (%s)",
250  audioCodec->long_name,
251  audioCodec->name);
252 
253  container.swr_ctx = swr_alloc();
254 
255  if (!container.swr_ctx)
256  {
257  VUserLog("Error: FFmpeg could not allocate resampler context.");
258  container.audioStreamIndex = -1;
259  audio_channels = 0;
260  return false;
261  }
262  else
263  {
264  /* set output resample options */
265  int src_ch_layout = container.audioCodecCtx->channel_layout;
266  int src_rate = container.audioCodecCtx->sample_rate;
267  audio_channels = container.audioCodecCtx->channels;
268  AVSampleFormat src_sample_fmt = container.audioCodecCtx->sample_fmt;
269 
270  // we want planar doubles
271  AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_DBLP;
272 
273  av_opt_set_int(container.swr_ctx, "in_channel_layout", src_ch_layout, 0);
274  av_opt_set_int(container.swr_ctx, "in_sample_rate", src_rate, 0);
275  av_opt_set_sample_fmt(container.swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
276 
277  av_opt_set_int(container.swr_ctx, "out_channel_layout", src_ch_layout, 0);
278  av_opt_set_int(container.swr_ctx, "out_sample_rate", VuoAudioSamples_sampleRate, 0);
279  av_opt_set_sample_fmt(container.swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
280 
281  container.bytesPerAudioSample = av_get_bytes_per_sample(src_sample_fmt);
282 
283  /* averaging filter for audio sync */
284  audio_diff_avg_coef = exp(log(0.01 / AUDIO_DIFF_AVG_NB));
285  audio_diff_avg_count = 0;
286  audio_diff_cum = .0;
287 
288  // Initialize audio_buffer that is used to store data straight from frames, and before
289  // they're loaded into VuoAudioSamples arrays.
290  audio_buf = (uint8_t **)calloc(container.audioCodecCtx->channels, sizeof(uint8_t *));
291  audio_buf_size = 0;
292  audio_buf_index = 0;
293 
294  // sets default properties and lets ffmpeg manage data
295  ret = av_new_packet(&audio_packet, 32);
296  if (ret)
297  {
298  VUserLog("Error: Couldn't create a packet: %s", av_err2str(ret));
299  audio_channels = 0;
300  container.audioStreamIndex = -1;
301  return false;
302  }
303  audio_pkt_data = NULL;
304  audio_pkt_size = 0;
305  audioIsEnabled = VuoReal_areEqual(mPlaybackRate, 1.);
306 
307  // use 1/30 because humans can't tell the difference less than this threshold
308  // 2.0 * 512 / VuoAudioSamples_sampleRate; //container.audioCodecCtx->sample_rate;
309  audio_diff_threshold = 1./30;
310 
311  if ((ret = swr_init(container.swr_ctx)) < 0)
312  {
313  VUserLog("Error: Could not initialize audio converter context. The audio track may be corrupt, or empty.\n%s", av_err2str(ret));
314  audio_channels = 0;
315  container.audioStreamIndex = -1;
316  return false;
317  }
318  }
319 
320  return true;
321  }
322 }
323 
324 bool VuoFfmpegDecoder::InitializeVideoInfo()
325 {
326  if( !DecodeVideoFrame() )
327  {
328  VDebugLog("Coudn't find first video frame!");
329  return false;
330  }
331 
332  VideoFrame frame;
333  videoFrames.Peek(&frame);
334 
335  // first frame is sometimes incorrect - seek to re-orient then test timestamps
336  SeekToPts(frame.pts, NULL);
337 
338  if(!DecodeVideoFrame())
339  {
340  VDebugLog("Couldn't decode first video frame (2)");
341  return false;
342  }
343 
344  VideoInfo& vi = container.videoInfo;
345  videoFrames.Peek(&frame);
346  vi.first_pts = frame.pts;
347 
348  if(container.videoStream->duration != AV_NOPTS_VALUE)
349  // will be just an estimate until NextVideoFrame gets
350  // called to exhaustion
351  vi.duration = container.videoStream->duration;
352  else
353  vi.duration = AV_NOPTS_VALUE;
354 
355  vi.last_pts = AV_NOPTS_VALUE;
356  vi.max_pts = AV_NOPTS_VALUE;
357 
358  SeekToPts(vi.first_pts, NULL);
359 
360 
361  if(ContainsAudio())
362  {
363  if (!DecodeAudioFrame())
364  VUserLog("Warning: Couldn't decode the first audio frame.");
365 
366  VideoInfo& ai = container.audioInfo;
367 
368  AudioFrame aframe;
369 
370  if(audioFrames.Peek(&aframe))
371  {
372  ai.first_pts = aframe.pts;
373  ai.last_pts = AV_NOPTS_VALUE;
374  ai.duration = container.audioStream->duration;
375  }
376  else
377  {
378  ai.first_pts = 0;//AV_NOPTS_VALUE;
379  ai.last_pts = AV_NOPTS_VALUE;
380  ai.duration = container.audioStream->duration;
381  }
382  }
383 
384  lastVideoTimestamp = VuoFfmpegUtility::AvTimeToSecond(container.videoStream, container.videoInfo.first_pts);
385  lastAudioTimestamp = ContainsAudio() ? VuoFfmpegUtility::AvTimeToSecond(container.audioStream, container.audioInfo.first_pts) : -1;
386 
387  return true;
388 }
389 
391 {
392  if(mVideoPath != NULL)
393  VuoRelease(mVideoPath);
394 
395  // empty frames
396  videoFrames.Clear();
397  videoPackets.Clear();
398 
399  if(ContainsAudio())
400  {
401  if(audio_buf != NULL && audio_buf_size > 0)
402  {
403  for(int i = 0; i < audio_channels; i++)
404  {
405  free(audio_buf[i]);
406  }
407 
408  free(audio_buf);
409  }
410 
411  audioFrames.Clear();
412  audioPackets.Clear();
413  }
414 
415  if(container.formatCtx != NULL) avformat_close_input(&container.formatCtx);
416 }
417 
422 {
423  return container.audioStreamIndex < 0 || audio_channels > 0;
424 }
425 
427 {
428  return audio_channels;
429 }
430 
434 bool VuoFfmpegDecoder::NextPacket()
435 {
436  AVPacket packet;
437 
438  int ret;
439  while((ret = av_read_frame(container.formatCtx, &packet)) >= 0)
440  {
441  if( packet.stream_index == container.videoStreamIndex ||
442  packet.stream_index == container.audioStreamIndex )
443  {
444  AVPacket pkt;
445  if (av_packet_ref(&pkt, &packet) < 0)
446  continue;
447 
448  if( packet.stream_index == container.videoStreamIndex )
449  videoPackets.Add(pkt);
450  else if( packet.stream_index == container.audioStreamIndex && audioIsEnabled )
451  audioPackets.Add(pkt);
452 
453  return true;
454  }
455  else
456  {
457  av_packet_unref(&packet);
458  }
459  }
460 
461  if (ret != AVERROR_EOF)
462  VUserLog("Error: %s", av_err2str(ret));
463 
464  return false;
465 }
466 
468 {
469  VideoFrame queuedFrame;
470 
471  if( mPlaybackRate >= 0 )
472  {
473  while(!videoFrames.Shift(&queuedFrame))
474  {
475  if(!DecodeVideoFrame())
476  return false;
477  }
478  }
479  else
480  {
481  while(!videoFrames.Pop(&queuedFrame))
482  {
483  if(!DecodePreceedingVideoFrames())
484  return false;
485  }
486  }
487 
488  videoFrame->image = queuedFrame.image;
489  videoFrame->timestamp = queuedFrame.timestamp;
490  videoFrame->duration = queuedFrame.duration;
491 
492  lastVideoTimestamp = queuedFrame.timestamp;
493  lastSentVideoPts = queuedFrame.pts;
494 
495  // if the audio is behind video, put this last frame back in the front of the queue.
496  if (audioIsEnabled && !seeking && AudioOffset() < MAX_AUDIO_LATENCY)
497  {
498  VDebugLog("dup video frame: v: %.3f, a: %.3f => %f", lastVideoTimestamp, lastAudioTimestamp, AudioOffset());
499  VuoRetain(queuedFrame.image);
500  videoFrames.Unshift(queuedFrame);
501  }
502 
503  return true;
504 }
505 
511 bool VuoFfmpegDecoder::StepVideoFrame(int64_t pts, VuoVideoFrame *frame)
512 {
513  if (frame)
514  frame->image = NULL;
515 
516  VideoFrame queuedFrame;
517 
518  const double floatingPointError = 0.0001;
519  double requestedFrameTime = VuoFfmpegUtility::AvTimeToSecond(container.videoStream, pts);
520  do
521  {
522  while(!videoFrames.Shift(&queuedFrame))
523  {
524  if(!DecodeVideoFrame())
525  {
526  return false;
527  }
528  }
529  if (queuedFrame.image)
530  {
531  if (frame)
532  {
533  if (frame->image)
534  VuoRelease(frame->image);
535  *frame = VuoVideoFrame_make(queuedFrame.image, queuedFrame.timestamp, queuedFrame.duration);
536  }
537  else
538  VuoRelease(queuedFrame.image);
539  }
540  } while (queuedFrame.timestamp + queuedFrame.duration < requestedFrameTime + floatingPointError);
541 
542  lastVideoTimestamp = queuedFrame.timestamp;
543  lastSentVideoPts = queuedFrame.pts;
544 
545  while(videoFrames.Shift(&queuedFrame))
546  if (queuedFrame.image)
547  VuoRelease(queuedFrame.image);
548 
549  return true;
550 }
551 
552 bool VuoFfmpegDecoder::StepAudioFrame(int64_t pts)
553 {
554  AudioFrame audioFrame;
555 
556  do
557  {
558  while(!audioFrames.Shift(&audioFrame))
559  {
560  if(!DecodeAudioFrame())
561  return false;
562  }
563 
564  // don't Delete frames when seeking since decode isn't actually allocating anything
565  // AudioFrame::Delete(&audioFrame);
566  } while(audioFrame.pts < pts);
567 
568  lastAudioTimestamp = audioFrame.timestamp;
569 
570  // flush whatever's left - shouldn't be much
571  while (audioFrames.Shift(&audioFrame));
572 
573  return true;
574 }
575 
576 bool VuoFfmpegDecoder::DecodePreceedingVideoFrames()
577 {
578  // check that we're not already at the beginning
579  // populate the video frame queue
580  VideoFrame vframe;
581  vframe.timestamp = lastVideoTimestamp;
582 
583  if(videoFrames.Shift(&vframe))
584  {
585  if(vframe.pts <= container.videoInfo.first_pts)
586  {
587  VDebugLog("current frame < 0");
588  return false;
589  }
590  }
591  else
592  {
593  if(lastSentVideoPts <= container.videoInfo.first_pts)
594  {
595  VDebugLog("current frame < 0");
596  return false;
597  }
598  }
599 
601  const double mReversePlaybackStep = 1.;
602 
603  double currentTimestamp = lastVideoTimestamp;
604  double seekTarget = currentTimestamp - mReversePlaybackStep;
605 
606  // if there are already frames in the queue, this preserves the order
607  LLNode<VideoFrame>* first = videoFrames.first;
608  LLNode<VideoFrame>* last = videoFrames.last;
609  videoFrames.first = NULL;
610  videoFrames.last = NULL;
611 
612  if( !SeekToSecond(seekTarget, NULL) )
613  return false;
614 
615  vframe.timestamp = lastVideoTimestamp;
616 
617  while(vframe.timestamp < currentTimestamp)
618  {
619  if(!DecodeVideoFrame())
620  {
621  break;
622  }
623 
624  videoFrames.PeekLast(&vframe);
625  }
626 
627  if(videoFrames.Pop(&vframe))
628  VideoFrame::Delete(&vframe);
629 
630  // append original frames to frames list
631  if(first != NULL)
632  {
633  videoFrames.last->next = first;
634  first->previous = videoFrames.last;
635  videoFrames.last = last;
636  }
637 
638  return true;
639 }
640 
641 bool VuoFfmpegDecoder::DecodeVideoFrame()
642 {
643  AVFrame* frame = av_frame_alloc();
644  int frameFinished = 0;
645  AVPacket packet;
646  av_init_packet(&packet);
647  unsigned int skips = 0;
648 
649 SKIP_VIDEO_FRAME:
650 
651  while(!frameFinished)
652  {
653  while(!videoPackets.Shift(&packet))
654  {
655  if(!NextPacket())
656  {
657  // https://b33p.net/kosada/node/12217
658  // No next packet is available, but FFmpeg may still have a few frames in its internal buffer.
659  // So instead of giving up now, call avcodec_decode_video2() with an empty packet to flush it.
660  av_new_packet(&packet, 0);
661  break;
662  }
663  }
664 
665 #if 1
666 #pragma clang diagnostic push
667 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
668  avcodec_decode_video2(container.videoCodecCtx, frame, &frameFinished, &packet);
669 #pragma clang diagnostic pop
670 #else
671  int ret = avcodec_send_packet(container.videoCodecCtx, &packet);
672  if (ret == 0
673  || ret == AVERROR(EAGAIN))
674  {
675  ret = avcodec_receive_frame(container.videoCodecCtx, frame);
676  if (ret == 0)
677  frameFinished = 1;
678  else if (ret == AVERROR(EAGAIN))
679  // FFmpeg is telling us we need to send more packets to it before it can decode the frame.
680  ;
681  else
682  VUserLog("avcodec_receive_frame error: %s", av_err2str(ret));
683  }
684  else if (ret == AVERROR(EINVAL))
685  // FFmpeg is telling us we need to send more packets to it before it can decode the frame.
686  ;
687  else
688  VUserLog("avcodec_send_packet error: %s", av_err2str(ret));
689 #endif
690 
691  if (frameFinished == 0 && packet.size == 0)
692  {
693  // After we fed it an empty packet, FFmpeg says it doesn't have a frame for us,
694  // so decoding is maybe actually finished now.
695 
696  VideoInfo& v = container.videoInfo;
697 
698  // out of video packets. last_pts is now accurate.
699  if( v.last_pts == AV_NOPTS_VALUE && v.max_pts != AV_NOPTS_VALUE )
700  v.last_pts = v.max_pts;
701 
702  av_frame_free(&frame);
703  return false;
704  }
705  }
706 
707  if( frameFinished && frame != NULL)
708  {
709  // Get PTS here because formats with predictive frames can return junk values before a full frame is found
710  int64_t pts = frame->best_effort_timestamp;
711 
712  // For unknown reasons, FFmpeg sometimes returns large PTS gaps when playing an RTSP stream.
713  // https://b33p.net/kosada/node/13972
714  int64_t ptsDelta = pts - lastDecodedVideoPts;
715  if (packet.duration > 0
716  && lastDecodedVideoPts > 0
717  && ptsDelta > packet.duration * 100
718  && ptsDelta < 0x7000000000000000) // Don't apply this workaround during preroll, which commonly has bogus timestamps.
719  {
720  if (!showedTimestampGapWarning)
721  {
722  VUserLog("Warning: The video stream has a large timestamp gap. Using estimated timestamps instead.");
723  showedTimestampGapWarning = true;
724  }
725  pts = lastDecodedVideoPts + packet.duration;
726  }
727 
728  int64_t duration = packet.duration == 0 ? pts - lastDecodedVideoPts : packet.duration;
729  lastDecodedVideoPts = pts;
730 
731  if( container.videoInfo.max_pts == AV_NOPTS_VALUE || lastDecodedVideoPts > container.videoInfo.max_pts )
732  container.videoInfo.max_pts = lastDecodedVideoPts;
733 
734  if( skips < MAX_FRAME_SKIP && !seeking && AudioOffset() > MAX_AUDIO_LEAD )
735  {
736  double predicted_timestamp = VuoFfmpegUtility::AvTimeToSecond(container.videoStream, pts + duration);
737 
738  // don't skip a frame if we're just going to drop it in the next function - this can
739  // happen if the video frame duration is greater than (abs(MAX_AUDIO_LATENCY) + MAX_AUDIO_LEAD).
740  if( lastAudioTimestamp - predicted_timestamp > MAX_AUDIO_LATENCY )
741  {
742  av_packet_unref(&packet);
743  av_init_packet(&packet);
744  av_frame_free(&frame);
745  frame = av_frame_alloc();
746  skips++; // don't skip more than MAX_SFRAME_KIPS frame per-decode
747  frameFinished = false;
748  goto SKIP_VIDEO_FRAME;
749  }
750  }
751 
752  av_packet_unref(&packet);
753 
754  // if seeking and going forward in time, it's okay to skip decoding the image
755  VideoFrame vframe = (VideoFrame)
756  {
757  seeking ? NULL : VuoFfmpegUtility::VuoImageWithAVFrame(container.videoCodecCtx, frame),
758  pts,
759  VuoFfmpegUtility::AvTimeToSecond(container.videoStream, pts), // the first video timestamp may not be zero!
760  VuoFfmpegUtility::AvTimeToSecond(container.videoStream, duration)
761  };
762 
763  if(vframe.image != NULL)
764  VuoRetain(vframe.image);
765 
766  videoFrames.Add(vframe);
767  av_frame_free(&frame);
768 
769  if(skips > 0)
770  VDebugLog("skip frame: v:%f a:%f ==> %f", lastVideoTimestamp, lastAudioTimestamp, AudioOffset());
771 
772  return true;
773  }
774  else
775  {
776  if (frame)
777  av_frame_free(&frame);
778  av_packet_unref(&packet);
779  }
780 
781  return false;
782 }
783 
785 {
786  if (!audioIsEnabled || audio->channels == NULL)
787  return false;
788 
789  // the wanted sample size in bytes to fill each audio channel (in bytes).
790  unsigned int sampleSize = VuoAudioSamples_bufferSize * sizeof(double);
791  // the current index in the audio samples array that is being appended to from audio_buf (in bytes).
792  unsigned int sampleIndex = 0;
793  // the size in bytes to copy from each audio_buf channel to audio samples at sampleIndex
794  unsigned int copySize = 0;
795 
796  // Allocate audio sample vectors.
797  for(int i = 0; i < audio_channels; i++)
798  {
801  VuoListAppendValue_VuoAudioSamples(audio->channels, samples);
802  }
803 
804  // while audio needs more samples to fill
805  while(sampleIndex < sampleSize)
806  {
807  // if the audio_buffer is out of samples, decode more
808  if(audio_buf_index >= audio_buf_size)
809  {
810  if( !FillAudioBuffer() )
811  return false;
812  }
813 
814  audio->timestamp = lastAudioTimestamp;
815 
816  // now copy from audio_buf to audio.sampleSize
817  copySize = audio_buf_size - audio_buf_index;
818 
819  // if audio_buf has too many samples to fit in audio.samples, just copy til audio is full and return
820  if(copySize + sampleIndex > sampleSize)
821  copySize = sampleSize - sampleIndex;
822 
823  for(int i = 0; i < audio_channels; i++)
824  {
825  VuoAudioSamples samples = VuoListGetValue_VuoAudioSamples(audio->channels, i+1);
826  memcpy(samples.samples + sampleIndex/sizeof(double), audio_buf[i] + audio_buf_index, copySize);
827  }
828 
829  sampleIndex += copySize;
830  audio_buf_index += copySize;
831  }
832 
833  return true;
834 }
835 
836 void VuoFfmpegDecoder::ClearAudioBuffer()
837 {
838  // first order of business is to free the old buffer
839  if(audio_buf_size > 0)
840  {
841  for(int i = 0; i < audio_channels; i++)
842  {
843  if(audio_buf[i] != NULL)
844  free( audio_buf[i] );
845  }
846  }
847  if (audio_buf)
848  {
849  free(audio_buf);
850  audio_buf = NULL;
851  }
852 
853  // now reset the audio_buf_index and audio_buf_size to match the new decoded frame
854  audio_buf_index = 0;
855  audio_buf_size = 0;
856 }
857 
858 bool VuoFfmpegDecoder::FillAudioBuffer()
859 {
860  ClearAudioBuffer();
861 
862  AudioFrame audioFrame;
863 
864  while(!audioFrames.Shift(&audioFrame))
865  {
866  if(!DecodeAudioFrame())
867  {
868  lastAudioTimestamp = -1;//audioFrame.timestamp;
869  return false;
870  }
871  }
872 
873  lastAudioTimestamp = audioFrame.timestamp;
874  audio_buf = audioFrame.samples;
875  audio_buf_size = audioFrame.size;
876 
877  return true;
878 }
879 
880 double VuoFfmpegDecoder::AudioOffset()
881 {
882  if( ContainsAudio() && audioIsEnabled && lastAudioTimestamp != -1 )
883  return lastAudioTimestamp - lastVideoTimestamp;
884  else
885  return 0.;
886 }
887 
888 bool VuoFfmpegDecoder::DecodeAudioFrame()
889 {
890  AVFrame* frame = av_frame_alloc();
891  container.audioCodecCtx->request_sample_fmt = AV_SAMPLE_FMT_FLTP;
892 
893  // don't bother allocating samples array if seeking since we're just decoding packets for timestamps
894  uint8_t** samples = seeking ? NULL : (uint8_t**)malloc(sizeof(uint8_t*) * audio_channels);
895 
896  int len1;
897  int converted_sample_count = 0;
898 
899  for(;;)
900  {
901  // int packetsSent = 0;
902 
903  while(audio_pkt_size > 0)
904  {
905  int got_frame = 0;
906 
907 #if 1
908 #pragma clang diagnostic push
909 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
910  len1 = avcodec_decode_audio4(container.audioCodecCtx, frame, &got_frame, &audio_packet);
911 #pragma clang diagnostic pop
912 #else
913  int ret = avcodec_send_packet(container.audioCodecCtx, &audio_packet);
914  ++packetsSent;
915  if (ret == 0
916  || ret == AVERROR(EAGAIN))
917  {
918  ret = avcodec_receive_frame(container.audioCodecCtx, frame);
919  if (ret == 0)
920  {
921  got_frame = 1;
922  len1 = audio_packet.size;
923  }
924  else if (ret == AVERROR(EAGAIN))
925  {
926  // FFmpeg is telling us we need to send more packets to it before it can decode the frame.
927  // But sometimes it's never satiated, so give up after we've tried feeding it a few times.
928  // Fixes hang in `TestVuoVideo::testDecodePerformance(MPEG v4 HE AAC opt=1)`
929  if (packetsSent > 5)
930  return false;
931  }
932  else
933  VUserLog("avcodec_receive_frame error: %s", av_err2str(ret));
934  }
935  else if (ret == AVERROR(EINVAL))
936  // FFmpeg is telling us we need to send more packets to it before it can decode the frame.
937  ;
938  else
939  {
940  VUserLog("avcodec_send_packet error: %s", av_err2str(ret));
941  // Sometimes, after seeking, just the first few packets are invalid,
942  // or all the packets are invalid. Give up after a few tries.
943  if (ret == AVERROR_INVALIDDATA && packetsSent > 5)
944  return false;
945  }
946 #endif
947 
948  /* if error, skip frame */
949  if(len1 < 0)
950  {
951  audio_pkt_size = 0;
952  break;
953  }
954 
955  audio_pkt_data += len1;
956  audio_pkt_size -= len1;
957 
958  if (got_frame)
959  {
960  int64_t pts = frame->best_effort_timestamp;
961 
962  if(seeking)
963  {
964  AudioFrame audioFrame = {
965  0,
966  audio_channels,
967  pts,
968  VuoFfmpegUtility::AvTimeToSecond(container.audioStream, pts),
969  NULL
970  };
971 
972  audioFrames.Add(audioFrame);
973 
974  av_frame_free(&frame);
975  return true;
976  }
977 
978  lastDecodedAudioPts = pts;
979 
980  // convert frame data to double planar
981  uint8_t **dst_data;
982  int dst_linesize;
983 
984  // figure out how many samples should come out of swr_convert
985  int dst_nb_samples = av_rescale_rnd(swr_get_delay(container.swr_ctx, container.audioCodecCtx->sample_rate) +
986  frame->nb_samples, VuoAudioSamples_sampleRate, container.audioCodecCtx->sample_rate, AV_ROUND_UP);
987 
988  /* allocate and fill destination double* arrays */
989  int ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, audio_channels, dst_nb_samples, AV_SAMPLE_FMT_DBLP, 0);
990 
991  if(ret < 0)
992  {
993  VUserLog("av_samples_alloc_array_and_samples error: %s", av_err2str(ret));
994  free(samples);
995  return false;
996  }
997 
1001  ret = swr_convert(container.swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)frame->data, frame->nb_samples);
1002  if(ret < 0) VUserLog("Failed conversion!");
1003 
1004  converted_sample_count = ret;
1005 
1006  /*
1007  * For planar sample formats, each audio channel is in a separate data plane, and linesize is the
1008  * buffer size, in bytes, for a single plane. All data planes must be the same size. For packed
1009  * sample formats, only the first data plane is used, and samples for each channel are interleaved.
1010  * In this case, linesize is the buffer size, in bytes, for the 1 plane.
1011  */
1012  for(int i = 0; i < audio_channels; i++)
1013  {
1014  samples[i] = (uint8_t*)malloc(dst_linesize);
1015  memcpy(samples[i], dst_data[i], dst_linesize);
1016  }
1017 
1018  av_frame_free(&frame);
1019 
1020  if (dst_data)
1021  {
1022  av_freep(&dst_data[0]);
1023  av_freep(&dst_data);
1024  }
1025 
1026  AudioFrame audioFrame = {
1027  (unsigned int)(converted_sample_count * sizeof(double)),
1028  audio_channels,
1029  pts,
1030  VuoFfmpegUtility::AvTimeToSecond(container.audioStream, pts),
1031  samples
1032  };
1033 
1034  audioFrames.Add(audioFrame);
1035 
1036  return true;
1037  }
1038  }
1039 
1040  if(audio_pkt_data != NULL)
1041  av_packet_unref(&audio_packet);
1042 
1043  while( !audioPackets.Shift(&audio_packet) )
1044  {
1045  if(!NextPacket())
1046  {
1047  audio_pkt_data = NULL;
1048  audio_pkt_size = 0;
1049  return false;
1050  }
1051  }
1052 
1053  audio_pkt_data = audio_packet.data;
1054  audio_pkt_size = audio_packet.size;
1055  }
1056 }
1057 
1059 {
1060  // Convert second to stream time
1061  int64_t pts = VuoFfmpegUtility::SecondToAvTime(container.videoStream, fmax(second, 0));
1062  SeekToPts(pts, frame);
1063  return true;
1064 }
1065 
1070 void VuoFfmpegDecoder::SeekToPts(int64_t pts, VuoVideoFrame *frame)
1071 {
1072  int64_t target_pts = pts;
1073 
1074  // flush queues
1075  videoPackets.Clear();
1076  videoFrames.Clear();
1077 
1078  avcodec_flush_buffers(container.videoCodecCtx);
1079 
1080  if(ContainsAudio())
1081  {
1082  ClearAudioBuffer();
1083  audioPackets.Clear();
1084  audioFrames.Clear();
1085  avcodec_flush_buffers(container.audioCodecCtx);
1086  }
1087 
1088  // seek video & audio
1089  int ret = 0;
1090  if (container.formatCtx->iformat->flags & AVFMT_NOFILE)
1091  {
1092  if (!showedSeekIgnoredWarning)
1093  {
1094  VUserLog("Warning: Ignoring seeks, since this is a stream (not a file).");
1095  showedSeekIgnoredWarning = true;
1096  }
1097  }
1098  else
1099  ret = av_seek_frame(container.formatCtx, container.videoStreamIndex, target_pts, AVSEEK_FLAG_BACKWARD);
1100 
1101  if(ret < 0)
1102  VDebugLog("Warning: av_seek_frame() failed: %s", av_err2str(ret));
1103 
1104 // seeking = true;
1105 
1106  // before seeking, set a "best guess" timestamp so that if the seek was to the end of video
1107  // and no stepping is required, the timestamp is still (somewhat) accurate
1108  lastVideoTimestamp = VuoFfmpegUtility::AvTimeToSecond(container.videoStream, target_pts);
1109  lastSentVideoPts = target_pts;
1110 
1111  // step video and audio til the frame timestamp matches pts
1112  if (!StepVideoFrame(pts, frame))
1113  VUserLog("Warning: Couldn't seek video.");
1114 
1115  if(ContainsAudio())
1116  {
1117  int64_t audioPts = av_rescale_q(pts, container.videoStream->time_base, container.audioStream->time_base);
1118 
1119  if( audioIsEnabled )
1120  if (!StepAudioFrame(audioPts))
1121  VUserLog("Warning: Couldn't seek audio.");
1122  }
1123 
1124 // seeking = false;
1125 }
1126 
1128 {
1129  return audio_channels > 0;
1130 }
1131 
1133 {
1134  int64_t duration = container.videoInfo.duration;
1135 
1136  if(duration == AV_NOPTS_VALUE)
1137  {
1138  if(container.videoInfo.last_pts == AV_NOPTS_VALUE)
1139  {
1140  if (container.formatCtx->iformat->flags & AVFMT_NOFILE)
1141  {
1142  // For streaming video sources (e.g., RTSP), just use the reported duration,
1143  // rather than seeking to the end (which could take a long time).
1144  container.videoInfo.duration = container.formatCtx->duration;
1145  return VuoFfmpegUtility::AvTimeToSecond(container.videoStream, container.formatCtx->duration);
1146  }
1147  else
1148  {
1149  // need to manually run through video til end to get last pts value
1150  seeking = true;
1151  if (!StepVideoFrame(INT64_MAX, NULL))
1152  VUserLog("Warning: Couldn't seek to end of video.");
1153  seeking = false;
1154  container.videoInfo.duration = container.videoInfo.last_pts - container.videoInfo.first_pts;
1155  }
1156  }
1157 
1158  return VuoFfmpegUtility::AvTimeToSecond(container.videoStream, container.videoInfo.last_pts) - VuoFfmpegUtility::AvTimeToSecond(container.videoStream, container.videoInfo.first_pts);
1159  }
1160  else
1161  {
1162  double u = VuoFfmpegUtility::AvTimeToSecond(container.videoStream, container.videoInfo.duration);
1163  return u;
1164  }
1165 }
1166 
1168 {
1169  bool audioWasEnabled = audioIsEnabled;
1170  audioIsEnabled = (ContainsAudio() && fabs(rate - 1.) < .00001);
1171 
1172  if( (!audioWasEnabled && audioIsEnabled) || rate > 0 != mPlaybackRate > 0 )
1173  {
1174  mPlaybackRate = rate;
1175  SeekToSecond(lastVideoTimestamp, NULL);
1176  }
1177  else
1178  {
1179  mPlaybackRate = rate;
1180  }
1181 }
1182 
1184 {
1185  return lastVideoTimestamp;
1186 }
1187 
1189 {
1190  return av_q2d(container.videoStream->avg_frame_rate);
1191 }