diff --git a/libavfilter/fifo.c b/libavfilter/fifo.c
index ae464036c6e0ba5cf62abb0d61a6578659f0fa5c..8d981cef965e26a7e568e8097ff5e2af776b97bb 100644
--- a/libavfilter/fifo.c
+++ b/libavfilter/fifo.c
@@ -183,8 +183,25 @@ static int return_audio_frame(AVFilterContext *ctx)
         }
 
         while (s->out->nb_samples < s->allocated_samples) {
-            int len = FFMIN(s->allocated_samples - s->out->nb_samples,
-                            head->nb_samples);
+            int len;
+
+            if (!s->root.next) {
+                ret = ff_request_frame(ctx->inputs[0]);
+                if (ret == AVERROR_EOF) {
+                    av_samples_set_silence(s->out->extended_data,
+                                           s->out->nb_samples,
+                                           s->allocated_samples -
+                                           s->out->nb_samples,
+                                           nb_channels, link->format);
+                    s->out->nb_samples = s->allocated_samples;
+                    break;
+                } else if (ret < 0)
+                    return ret;
+            }
+            head = s->root.next->frame;
+
+            len = FFMIN(s->allocated_samples - s->out->nb_samples,
+                        head->nb_samples);
 
             av_samples_copy(s->out->extended_data, head->extended_data,
                             s->out->nb_samples, 0, len, nb_channels,
@@ -194,21 +211,6 @@ static int return_audio_frame(AVFilterContext *ctx)
             if (len == head->nb_samples) {
                 av_frame_free(&head);
                 queue_pop(s);
-
-                if (!s->root.next &&
-                    (ret = ff_request_frame(ctx->inputs[0])) < 0) {
-                    if (ret == AVERROR_EOF) {
-                        av_samples_set_silence(s->out->extended_data,
-                                               s->out->nb_samples,
-                                               s->allocated_samples -
-                                               s->out->nb_samples,
-                                               nb_channels, link->format);
-                        s->out->nb_samples = s->allocated_samples;
-                        break;
-                    }
-                    return ret;
-                }
-                head = s->root.next->frame;
             } else {
                 buffer_offset(link, head, len);
             }
diff --git a/tests/fate/filter-audio.mak b/tests/fate/filter-audio.mak
index 63293d97b2a2c1a6e9096a56ababc75d447526be..7f1a056f9a16fe23aaef383d8c9a6de23e7ebb9c 100644
--- a/tests/fate/filter-audio.mak
+++ b/tests/fate/filter-audio.mak
@@ -45,7 +45,7 @@ fate-filter-join: SRC2 = $(TARGET_PATH)/tests/data/asynth-44100-3.wav
 fate-filter-join: tests/data/asynth-44100-2.wav tests/data/asynth-44100-3.wav
 fate-filter-join: CMD = md5 -i $(SRC1) -i $(SRC2) -filter_complex join=channel_layout=5 -f s16le
 fate-filter-join: CMP = oneline
-fate-filter-join: REF = 38fa1b18b0c46d77df6f17bfc4f078dd
+fate-filter-join: REF = 409e66fc5ece4ddea4aa16bc47026bb0
 
 FATE_AFILTER-$(call FILTERDEMDECENCMUX, VOLUME, WAV, PCM_S16LE, PCM_S16LE, PCM_S16LE) += fate-filter-volume
 fate-filter-volume: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav