pytorch · jdsgomes · Mar 25, 2022 · Mar 18, 2022 · Mar 18, 2022 · Mar 21, 2022
diff --git a/torchvision/csrc/io/decoder/decoder.cpp b/torchvision/csrc/io/decoder/decoder.cpp
@@ -18,25 +18,6 @@ constexpr size_t kIoBufferSize = 96 * 1024;
 constexpr size_t kIoPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
 constexpr size_t kLogBufferSize = 1024;
 
-int ffmpeg_lock(void** mutex, enum AVLockOp op) {
-  std::mutex** handle = (std::mutex**)mutex;
-  switch (op) {
-    case AV_LOCK_CREATE:
-      *handle = new std::mutex();
-      break;
-    case AV_LOCK_OBTAIN:
-      (*handle)->lock();
-      break;
-    case AV_LOCK_RELEASE:
-      (*handle)->unlock();
-      break;
-    case AV_LOCK_DESTROY:
-      delete *handle;
-      break;
-  }
-  return 0;
-}
-
 bool mapFfmpegType(AVMediaType media, MediaType* type) {
   switch (media) {
     case AVMEDIA_TYPE_AUDIO:
@@ -202,8 +183,6 @@ void Decoder::initOnce() {
     avcodec_register_all();
 #endif
     avformat_network_init();
-    // register ffmpeg lock manager
-    av_lockmgr_register(&ffmpeg_lock);
     av_log_set_callback(Decoder::logFunction);
     av_log_set_level(AV_LOG_ERROR);
     VLOG(1) << "Registered ffmpeg libs";
@@ -277,7 +256,7 @@ bool Decoder::init(
           break;
       }
 
-      fmt = av_find_input_format(fmtName);
+      fmt = (AVInputFormat*)av_find_input_format(fmtName);
     }
 
     const size_t avioCtxBufferSize = kIoBufferSize;
@@ -505,10 +484,14 @@ int Decoder::getFrame(size_t workingTimeInMs) {
   // once decode() method gets called and grab some bytes
   // run this method again
   // init package
-  AVPacket avPacket;
-  av_init_packet(&avPacket);
-  avPacket.data = nullptr;
-  avPacket.size = 0;
+  // update 03/22: moving memory management to ffmpeg
+  AVPacket* avPacket;
+  avPacket = av_packet_alloc();
+  if (avPacket == NULL) {
+    LOG(ERROR) << "decoder as not able to allocate the packet.";
+  }
+  avPacket->data = nullptr;
+  avPacket->size = 0;
 
   auto end = std::chrono::steady_clock::now() +
       std::chrono::milliseconds(workingTimeInMs);
@@ -521,7 +504,7 @@ int Decoder::getFrame(size_t workingTimeInMs) {
   size_t decodingErrors = 0;
   bool decodedFrame = false;
   while (!interrupted_ && inRange_.any() && !decodedFrame && watcher()) {
-    result = av_read_frame(inputCtx_, &avPacket);
+    result = av_read_frame(inputCtx_, avPacket);
     if (result == AVERROR(EAGAIN)) {
       VLOG(4) << "Decoder is busy...";
       std::this_thread::yield();
@@ -538,10 +521,11 @@ int Decoder::getFrame(size_t workingTimeInMs) {
       break;
     }
 
-    // get stream
-    auto stream = findByIndex(avPacket.stream_index);
+    // get stream; if stream cannot be found reset the packet to
+    // default settings
+    auto stream = findByIndex(avPacket->stream_index);
     if (stream == nullptr || !inRange_.test(stream->getIndex())) {
-      av_packet_unref(&avPacket);
+      av_packet_unref(avPacket);
       continue;
     }
 
@@ -553,7 +537,7 @@ int Decoder::getFrame(size_t workingTimeInMs) {
       bool hasMsg = false;
       // packet either got consumed completely or not at all
       if ((result = processPacket(
-               stream, &avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) {
+               stream, avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) {
         LOG(ERROR) << "processPacket failed with code: " << result;
         break;
       }
@@ -585,10 +569,10 @@ int Decoder::getFrame(size_t workingTimeInMs) {
 
     result = 0;
 
-    av_packet_unref(&avPacket);
+    av_packet_unref(avPacket);
   }
 
-  av_packet_unref(&avPacket);
+  av_packet_unref(avPacket);
 
   VLOG(2) << "Interrupted loop"
           << ", interrupted_ " << interrupted_ << ", inRange_.any() "

diff --git a/torchvision/csrc/io/decoder/stream.cpp b/torchvision/csrc/io/decoder/stream.cpp
@@ -28,7 +28,7 @@ Stream::~Stream() {
 
 // look up the proper CODEC querying the function
 AVCodec* Stream::findCodec(AVCodecParameters* params) {
-  return avcodec_find_decoder(params->codec_id);
+  return (AVCodec*)avcodec_find_decoder(params->codec_id);
 }
 
 // Allocate memory for the AVCodecContext, which will hold the context for

diff --git a/torchvision/csrc/io/decoder/subtitle_stream.cpp b/torchvision/csrc/io/decoder/subtitle_stream.cpp
@@ -44,20 +44,26 @@ int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
   // clean-up
   releaseSubtitle();
   // check flush packet
-  AVPacket avPacket;
-  av_init_packet(&avPacket);
-  avPacket.data = nullptr;
-  avPacket.size = 0;
-  auto pkt = packet ? *packet : avPacket;
+  AVPacket* avPacket;
+  avPacket = av_packet_alloc();
+  if (avPacket == NULL) {
+    LOG(ERROR) << "decoder as not able to allocate the packet.";
+  }
+  avPacket->data = nullptr;
+  avPacket->size = 0;
+
+  auto pkt = packet ? packet : avPacket;
   int gotFramePtr = 0;
-  int result = avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, &pkt);
+  // is these a better way than cast from const?
+  int result =
+      avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, (AVPacket*)pkt);
 
   if (result < 0) {
     LOG(ERROR) << "avcodec_decode_subtitle2 failed, err: "
                << Util::generateErrorDesc(result);
     return result;
   } else if (result == 0) {
-    result = pkt.size; // discard the rest of the package
+    result = pkt->size; // discard the rest of the package
   }
 
   sub_.release = gotFramePtr;
@@ -66,7 +72,7 @@ int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
   // set proper pts in us
   if (gotFramePtr) {
     sub_.pts = av_rescale_q(
-        pkt.pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ);
+        pkt->pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ);
   }
 
   return result;