diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index f20cafc5520c3a1d4c4799cfbc7aedbf3532c303..08fad3ecd53e55372f255c69a5674726a6ef28de 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -2296,14 +2296,14 @@ fail:
     return AVERROR_INVALIDDATA;
 }
 
-static int add_metadata_from_side_data(AVCodecContext *avctx, AVFrame *frame)
+static int add_metadata_from_side_data(AVPacket *avpkt, AVFrame *frame)
 {
     int size;
     const uint8_t *side_metadata;
 
     AVDictionary **frame_md = avpriv_frame_get_metadatap(frame);
 
-    side_metadata = av_packet_get_side_data(avctx->internal->pkt,
+    side_metadata = av_packet_get_side_data(avpkt,
                                             AV_PKT_DATA_STRINGS_METADATA, &size);
     return av_packet_unpack_dictionary(side_metadata, size, frame_md);
 }
@@ -2406,7 +2406,7 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
                 if (picture->format == AV_PIX_FMT_NONE)   picture->format              = avctx->pix_fmt;
             }
         }
-        add_metadata_from_side_data(avctx, picture);
+        add_metadata_from_side_data(avctx->internal->pkt, picture);
 
 fail:
         emms_c(); //needed to avoid an emms_c() call before every return;
@@ -2548,7 +2548,7 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             frame->pkt_dts = avpkt->dts;
         }
         if (ret >= 0 && *got_frame_ptr) {
-            add_metadata_from_side_data(avctx, frame);
+            add_metadata_from_side_data(avctx->internal->pkt, frame);
             avctx->frame_number++;
             av_frame_set_best_effort_timestamp(frame,
                                                guess_correct_pts(avctx,