pytorch · jdsgomes · Mar 25, 2022 · Mar 18, 2022 · Mar 18, 2022 · Mar 21, 2022
diff --git a/torchvision/csrc/io/decoder/decoder.cpp b/torchvision/csrc/io/decoder/decoder.cpp
@@ -1,6 +1,7 @@
 #include "decoder.h"
 #include <c10/util/Logging.h>
 #include <libavutil/avutil.h>
+#include <cerrno>
 #include <future>
 #include <iostream>
 #include <mutex>
@@ -18,25 +19,6 @@ constexpr size_t kIoBufferSize = 96 * 1024;
 constexpr size_t kIoPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
 constexpr size_t kLogBufferSize = 1024;
 
-int ffmpeg_lock(void** mutex, enum AVLockOp op) {
-  std::mutex** handle = (std::mutex**)mutex;
-  switch (op) {
-    case AV_LOCK_CREATE:
-      *handle = new std::mutex();
-      break;
-    case AV_LOCK_OBTAIN:
-      (*handle)->lock();
-      break;
-    case AV_LOCK_RELEASE:
-      (*handle)->unlock();
-      break;
-    case AV_LOCK_DESTROY:
-      delete *handle;
-      break;
-  }
-  return 0;
-}
-
 bool mapFfmpegType(AVMediaType media, MediaType* type) {
   switch (media) {
     case AVMEDIA_TYPE_AUDIO:
@@ -202,8 +184,6 @@ void Decoder::initOnce() {
     avcodec_register_all();
 #endif
     avformat_network_init();
-    // register ffmpeg lock manager
-    av_lockmgr_register(&ffmpeg_lock);
     av_log_set_callback(Decoder::logFunction);
     av_log_set_level(AV_LOG_ERROR);
     VLOG(1) << "Registered ffmpeg libs";
@@ -277,7 +257,7 @@ bool Decoder::init(
           break;
       }
 
-      fmt = av_find_input_format(fmtName);
+      fmt = (AVInputFormat*)av_find_input_format(fmtName);
     }
 
     const size_t avioCtxBufferSize = kIoBufferSize;
@@ -505,10 +485,15 @@ int Decoder::getFrame(size_t workingTimeInMs) {
   // once decode() method gets called and grab some bytes
   // run this method again
   // init package
-  AVPacket avPacket;
-  av_init_packet(&avPacket);
-  avPacket.data = nullptr;
-  avPacket.size = 0;
+  // update 03/22: moving memory management to ffmpeg
+  AVPacket* avPacket;
+  avPacket = av_packet_alloc();
+  if (avPacket == nullptr) {
+    LOG(ERROR) << "decoder as not able to allocate the packet.";
+    return AVERROR(ENOMEM);
+  }
+  avPacket->data = nullptr;
+  avPacket->size = 0;
 
   auto end = std::chrono::steady_clock::now() +
       std::chrono::milliseconds(workingTimeInMs);
@@ -521,7 +506,7 @@ int Decoder::getFrame(size_t workingTimeInMs) {
   size_t decodingErrors = 0;
   bool decodedFrame = false;
   while (!interrupted_ && inRange_.any() && !decodedFrame && watcher()) {
-    result = av_read_frame(inputCtx_, &avPacket);
+    result = av_read_frame(inputCtx_, avPacket);
     if (result == AVERROR(EAGAIN)) {
       VLOG(4) << "Decoder is busy...";
       std::this_thread::yield();
@@ -538,10 +523,11 @@ int Decoder::getFrame(size_t workingTimeInMs) {
       break;
     }
 
-    // get stream
-    auto stream = findByIndex(avPacket.stream_index);
+    // get stream; if stream cannot be found reset the packet to
+    // default settings
+    auto stream = findByIndex(avPacket->stream_index);
     if (stream == nullptr || !inRange_.test(stream->getIndex())) {
-      av_packet_unref(&avPacket);
+      av_packet_unref(avPacket);
       continue;
     }
 
@@ -553,7 +539,7 @@ int Decoder::getFrame(size_t workingTimeInMs) {
       bool hasMsg = false;
       // packet either got consumed completely or not at all
       if ((result = processPacket(
-               stream, &avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) {
+               stream, avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) {
         LOG(ERROR) << "processPacket failed with code: " << result;
         break;
       }
@@ -585,11 +571,10 @@ int Decoder::getFrame(size_t workingTimeInMs) {
 
     result = 0;
 
-    av_packet_unref(&avPacket);
+    av_packet_unref(avPacket);
   }
 
-  av_packet_unref(&avPacket);
-
+  av_packet_free(&avPacket);
   VLOG(2) << "Interrupted loop"
           << ", interrupted_ " << interrupted_ << ", inRange_.any() "
           << inRange_.any() << ", decodedFrame " << decodedFrame << ", result "

diff --git a/torchvision/csrc/io/decoder/stream.cpp b/torchvision/csrc/io/decoder/stream.cpp
@@ -28,7 +28,7 @@ Stream::~Stream() {
 
 // look up the proper CODEC querying the function
 AVCodec* Stream::findCodec(AVCodecParameters* params) {
-  return avcodec_find_decoder(params->codec_id);
+  return (AVCodec*)avcodec_find_decoder(params->codec_id);
 }
 
 // Allocate memory for the AVCodecContext, which will hold the context for

diff --git a/torchvision/csrc/io/decoder/subtitle_stream.cpp b/torchvision/csrc/io/decoder/subtitle_stream.cpp
@@ -43,21 +43,34 @@ int SubtitleStream::initFormat() {
 int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
   // clean-up
   releaseSubtitle();
+
+  // FIXME: should this even be created?
+  AVPacket* avPacket;
+  avPacket = av_packet_alloc();
+  if (avPacket == nullptr) {
+    LOG(ERROR)
+        << "decoder as not able to allocate the subtitle-specific packet.";
+    // alternative to ENOMEM
+    return AVERROR_BUFFER_TOO_SMALL;
+  }
+  avPacket->data = nullptr;
+  avPacket->size = 0;
   // check flush packet
-  AVPacket avPacket;
-  av_init_packet(&avPacket);
-  avPacket.data = nullptr;
-  avPacket.size = 0;
-  auto pkt = packet ? *packet : avPacket;
+  auto pkt = packet ? packet : avPacket;
+
   int gotFramePtr = 0;
-  int result = avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, &pkt);
+  // is these a better way than cast from const?
+  int result =
+      avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, (AVPacket*)pkt);
 
   if (result < 0) {
     LOG(ERROR) << "avcodec_decode_subtitle2 failed, err: "
                << Util::generateErrorDesc(result);
+    // free the packet we've created
+    av_packet_free(&avPacket);
     return result;
   } else if (result == 0) {
-    result = pkt.size; // discard the rest of the package
+    result = pkt->size; // discard the rest of the package
   }
 
   sub_.release = gotFramePtr;
@@ -66,9 +79,10 @@ int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
   // set proper pts in us
   if (gotFramePtr) {
     sub_.pts = av_rescale_q(
-        pkt.pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ);
+        pkt->pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ);
   }
 
+  av_packet_free(&avPacket);
   return result;
 }