diff --git a/audio/decode/ad.h b/audio/decode/ad.h
index 2ac9fb21f6..e09ded2efc 100644
--- a/audio/decode/ad.h
+++ b/audio/decode/ad.h
@@ -36,7 +36,7 @@ struct ad_functions {
     int (*init)(struct dec_audio *da, const char *decoder);
     void (*uninit)(struct dec_audio *da);
     int (*control)(struct dec_audio *da, int cmd, void *arg);
-    int (*decode_audio)(struct dec_audio *da, struct mp_audio *buffer, int maxlen);
+    int (*decode_packet)(struct dec_audio *da);
 };
 
 enum ad_ctrl {
diff --git a/audio/decode/ad_lavc.c b/audio/decode/ad_lavc.c
index 4d892da32b..cb8cfa8c82 100644
--- a/audio/decode/ad_lavc.c
+++ b/audio/decode/ad_lavc.c
@@ -49,7 +49,6 @@ struct priv {
 };
 
 static void uninit(struct dec_audio *da);
-static int decode_new_packet(struct dec_audio *da);
 
 #define OPT_BASE_STRUCT struct ad_lavc_params
 struct ad_lavc_params {
@@ -143,11 +142,10 @@ static const char *find_pcm_decoder(const struct pcm_map *map, int format,
     return NULL;
 }
 
-static int setup_format(struct dec_audio *da)
+static void set_data_from_avframe(struct dec_audio *da)
 {
     struct priv *priv = da->priv;
     AVCodecContext *lavc_context = priv->avctx;
-    struct sh_audio *sh_audio = da->header->audio;
 
     // Note: invalid parameters are rejected by dec_audio.c
 
@@ -164,12 +162,15 @@ static int setup_format(struct dec_audio *da)
     if (lavc_chmap.num != lavc_context->channels)
         mp_chmap_from_channels(&lavc_chmap, lavc_context->channels);
     if (priv->force_channel_map) {
+        struct sh_audio *sh_audio = da->header->audio;
         if (lavc_chmap.num == sh_audio->channels.num)
             lavc_chmap = sh_audio->channels;
     }
     mp_audio_set_channels(&da->decoded, &lavc_chmap);
 
-    return 0;
+    da->decoded.samples = priv->avframe->nb_samples;
+    for (int n = 0; n < da->decoded.num_planes; n++)
+        da->decoded.planes[n] = priv->avframe->data[n];
 }
 
 static void set_from_wf(AVCodecContext *avctx, MP_WAVEFORMATEX *wf)
@@ -261,22 +262,6 @@ static int init(struct dec_audio *da, const char *decoder)
         uninit(da);
         return 0;
     }
-    MP_VERBOSE(da, "INFO: libavcodec \"%s\" init OK!\n",
-           lavc_codec->name);
-
-    // Decode at least 1 sample:  (to get header filled)
-    for (int tries = 1; ; tries++) {
-        int x = decode_new_packet(da);
-        if (x >= 0 && ctx->frame.samples > 0) {
-            MP_VERBOSE(da, "Initial decode succeeded after %d packets.\n", tries);
-            break;
-        }
-        if (tries >= 50) {
-            MP_ERR(da, "initial decode failed\n");
-            uninit(da);
-            return 0;
-        }
-    }
 
     if (lavc_context->bit_rate != 0)
         da->bitrate = lavc_context->bit_rate;
@@ -308,7 +293,7 @@ static int control(struct dec_audio *da, int cmd, void *arg)
     switch (cmd) {
     case ADCTRL_RESET:
         avcodec_flush_buffers(ctx->avctx);
-        ctx->frame.samples = 0;
+        mp_audio_set_null_data(&da->decoded);
         talloc_free(ctx->packet);
         ctx->packet = NULL;
         return CONTROL_TRUE;
@@ -316,12 +301,12 @@ static int control(struct dec_audio *da, int cmd, void *arg)
     return CONTROL_UNKNOWN;
 }
 
-static int decode_new_packet(struct dec_audio *da)
+static int decode_packet(struct dec_audio *da)
 {
     struct priv *priv = da->priv;
     AVCodecContext *avctx = priv->avctx;
 
-    priv->frame.samples = 0;
+    mp_audio_set_null_data(&da->decoded);
 
     struct demux_packet *mpkt = priv->packet;
     if (!mpkt)
@@ -361,19 +346,13 @@ static int decode_new_packet(struct dec_audio *da)
             return 0;
     }
     if (ret < 0) {
-        MP_VERBOSE(da, "lavc_audio: error\n");
-        return -1;
+        MP_ERR(da, "Error decoding audio.\n");
+        return AD_ERR;
     }
     if (!got_frame)
-        return mpkt ? 0 : -1; // -1: eof
+        return mpkt ? AD_OK : AD_EOF;
 
-    if (setup_format(da) < 0)
-        return -1;
-
-    priv->frame.samples = priv->avframe->nb_samples;
-    mp_audio_copy_config(&priv->frame, &da->decoded);
-    for (int n = 0; n < priv->frame.num_planes; n++)
-        priv->frame.planes[n] = priv->avframe->data[n];
+    set_data_from_avframe(da);
 
     double out_pts = mp_pts_from_av(priv->avframe->pkt_pts, NULL);
     if (out_pts != MP_NOPTS_VALUE) {
@@ -381,27 +360,7 @@ static int decode_new_packet(struct dec_audio *da)
         da->pts_offset = 0;
     }
 
-    MP_DBG(da, "Decoded %d -> %d samples\n", in_len,
-           priv->frame.samples);
-    return 0;
-}
-
-static int decode_audio(struct dec_audio *da, struct mp_audio *buffer, int maxlen)
-{
-    struct priv *priv = da->priv;
-
-    if (!priv->frame.samples) {
-        if (decode_new_packet(da) < 0)
-            return AD_ERR;
-    }
-
-    if (!mp_audio_config_equals(buffer, &priv->frame))
-        return 0;
-
-    buffer->samples = MPMIN(priv->frame.samples, maxlen);
-    mp_audio_copy(buffer, 0, &priv->frame, 0, buffer->samples);
-    mp_audio_skip_samples(&priv->frame, buffer->samples);
-    da->pts_offset += buffer->samples;
+    MP_DBG(da, "Decoded %d -> %d samples\n", in_len, da->decoded.samples);
     return 0;
 }
 
@@ -418,5 +377,5 @@ const struct ad_functions ad_lavc = {
     .init = init,
     .uninit = uninit,
     .control = control,
-    .decode_audio = decode_audio,
+    .decode_packet = decode_packet,
 };
diff --git a/audio/decode/ad_mpg123.c b/audio/decode/ad_mpg123.c
index 055285cccd..f96a5a8036 100644
--- a/audio/decode/ad_mpg123.c
+++ b/audio/decode/ad_mpg123.c
@@ -35,9 +35,7 @@
 
 struct ad_mpg123_context {
     mpg123_handle *handle;
-    bool new_format;
     int sample_size;
-    bool need_data;
     /* Running mean for bit rate, stream length estimation. */
     float mean_rate;
     unsigned int mean_count;
@@ -58,7 +56,7 @@ static void uninit(struct dec_audio *da)
 
 /* This initializes libmpg123 and prepares the handle, including funky
  * parameters. */
-static int preinit(struct dec_audio *da)
+static int init(struct dec_audio *da, const char *decoder)
 {
     int err;
     struct ad_mpg123_context *con;
@@ -111,15 +109,18 @@ static int preinit(struct dec_audio *da)
      * We need at least 1152 samples. dec_audio.c normally guarantees this. */
     mpg123_param(con->handle, MPG123_REMOVE_FLAGS, MPG123_AUTO_RESAMPLE, 0.);
 
+    err = mpg123_open_feed(con->handle);
+    if (err != MPG123_OK)
+        goto bad_end;
+
     return 1;
 
   bad_end:
-    if (!con->handle)
-        MP_ERR(da, "mpg123 preinit error: %s\n",
-               mpg123_plain_strerror(err));
-    else
-        MP_ERR(da, "mpg123 preinit error: %s\n",
-               mpg123_strerror(con->handle));
+    if (!con->handle) {
+        MP_ERR(da, "mpg123 preinit error: %s\n", mpg123_plain_strerror(err));
+    } else {
+        MP_ERR(da, "mpg123 preinit error: %s\n", mpg123_strerror(con->handle));
+    }
 
     uninit(da);
     return 0;
@@ -140,111 +141,6 @@ static int mpg123_format_to_af(int mpg123_encoding)
     return 0;
 }
 
-/* libmpg123 has a new format ready; query and store, return return value
-   of mpg123_getformat() */
-static int set_format(struct dec_audio *da)
-{
-    struct ad_mpg123_context *con = da->priv;
-    int ret;
-    long rate;
-    int channels;
-    int encoding;
-    ret = mpg123_getformat(con->handle, &rate, &channels, &encoding);
-    if (ret == MPG123_OK) {
-        mp_audio_set_num_channels(&da->decoded, channels);
-        da->decoded.rate = rate;
-        int af = mpg123_format_to_af(encoding);
-        if (!af) {
-            /* This means we got a funny custom build of libmpg123 that only supports an unknown format. */
-            MP_ERR(da, "Bad encoding from mpg123: %i.\n", encoding);
-            return MPG123_ERR;
-        }
-        mp_audio_set_format(&da->decoded, af);
-        con->sample_size = channels * af_fmt2bps(af);
-        con->new_format = 0;
-    }
-    return ret;
-}
-
-static int feed_new_packet(struct dec_audio *da)
-{
-    struct ad_mpg123_context *con = da->priv;
-    int ret;
-
-    struct demux_packet *pkt = demux_read_packet(da->header);
-    if (!pkt)
-        return -1; /* EOF. */
-
-    /* Next bytes from that presentation time. */
-    if (pkt->pts != MP_NOPTS_VALUE) {
-        da->pts        = pkt->pts;
-        da->pts_offset = 0;
-    }
-
-    /* Have to use mpg123_feed() to avoid decoding here. */
-    ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
-    talloc_free(pkt);
-
-    if (ret == MPG123_ERR)
-        return -1;
-
-    if (ret == MPG123_NEW_FORMAT)
-        con->new_format = 1;
-
-    return 0;
-}
-
-/* Now we really start accessing some data and determining file format.
- * Format now is allowed to change on-the-fly. Here is the only point
- * that has MPlayer react to errors. We have to pray that exceptional
- * erros in other places simply cannot occur. */
-static int init(struct dec_audio *da, const char *decoder)
-{
-    if (!preinit(da))
-        return 0;
-
-    struct ad_mpg123_context *con = da->priv;
-    int ret;
-
-    ret = mpg123_open_feed(con->handle);
-    if (ret != MPG123_OK)
-        goto fail;
-
-    for (int n = 0; ; n++) {
-        if (feed_new_packet(da) < 0) {
-            ret = MPG123_NEED_MORE;
-            goto fail;
-        }
-        size_t got_now = 0;
-        ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
-        if (ret == MPG123_OK || ret == MPG123_NEW_FORMAT) {
-            ret = set_format(da);
-            if (ret == MPG123_OK)
-                break;
-        }
-        if (ret != MPG123_NEED_MORE)
-            goto fail;
-        // max. 16 retries (randomly chosen number)
-        if (n > 16) {
-            ret = MPG123_NEED_MORE;
-            goto fail;
-        }
-    }
-
-    return 1;
-
-fail:
-    if (ret == MPG123_NEED_MORE) {
-        MP_ERR(da, "Could not find mp3 stream.\n");
-    } else {
-        MP_ERR(da, "mpg123 init error: %s\n",
-               mpg123_strerror(con->handle));
-    }
-
-    uninit(da);
-    return 0;
-}
-
 /* Compute bitrate from frame size. */
 static int compute_bitrate(struct mpg123_frameinfo *i)
 {
@@ -290,50 +186,79 @@ static void update_info(struct dec_audio *da)
     }
 }
 
-static int decode_audio(struct dec_audio *da, struct mp_audio *buffer, int maxlen)
+/* libmpg123 has a new format ready; query and store, return return value
+   of mpg123_getformat() */
+static int set_format(struct dec_audio *da)
+{
+    struct ad_mpg123_context *con = da->priv;
+    int ret;
+    long rate;
+    int channels;
+    int encoding;
+    ret = mpg123_getformat(con->handle, &rate, &channels, &encoding);
+    if (ret == MPG123_OK) {
+        mp_audio_set_num_channels(&da->decoded, channels);
+        da->decoded.rate = rate;
+        int af = mpg123_format_to_af(encoding);
+        if (!af) {
+            /* This means we got a funny custom build of libmpg123 that only supports an unknown format. */
+            MP_ERR(da, "Bad encoding from mpg123: %i.\n", encoding);
+            return MPG123_ERR;
+        }
+        mp_audio_set_format(&da->decoded, af);
+        con->sample_size = channels * af_fmt2bps(af);
+    }
+    return ret;
+}
+
+static int decode_packet(struct dec_audio *da)
 {
     struct ad_mpg123_context *con = da->priv;
-    void *buf = buffer->planes[0];
     int ret;
 
-    if (con->new_format) {
-        ret = set_format(da);
-        if (ret == MPG123_OK) {
-            return 0; // let caller handle format change
-        } else if (ret == MPG123_NEED_MORE) {
-            con->need_data = true;
-        } else {
-            goto mpg123_fail;
-        }
+    mp_audio_set_null_data(&da->decoded);
+
+    struct demux_packet *pkt = demux_read_packet(da->header);
+    if (!pkt)
+        return AD_EOF;
+
+    /* Next bytes from that presentation time. */
+    if (pkt->pts != MP_NOPTS_VALUE) {
+        da->pts        = pkt->pts;
+        da->pts_offset = 0;
     }
 
-    if (con->need_data) {
-        if (feed_new_packet(da) < 0)
-            return AD_ERR;
-    }
+    /* Have to use mpg123_feed() to avoid decoding here. */
+    ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
+    talloc_free(pkt);
 
-    if (!mp_audio_config_equals(&da->decoded, buffer))
-        return 0;
-
-    size_t got_now = 0;
-    ret = mpg123_replace_buffer(con->handle, buf, maxlen * con->sample_size);
     if (ret != MPG123_OK)
         goto mpg123_fail;
 
-    ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
+    unsigned char *audio = NULL;
+    size_t bytes = 0;
+    ret = mpg123_decode_frame(con->handle, NULL, &audio, &bytes);
 
-    int got_samples = got_now / con->sample_size;
-    buffer->samples += got_samples;
-    da->pts_offset += got_samples;
+    if (ret == MPG123_NEED_MORE)
+        return 0;
 
-    if (ret == MPG123_NEW_FORMAT) {
-        con->new_format = true;
-    } else if (ret == MPG123_NEED_MORE) {
-        con->need_data = true;
-    } else if (ret != MPG123_OK && ret != MPG123_DONE) {
+    if (ret != MPG123_OK && ret != MPG123_DONE && ret != MPG123_NEW_FORMAT)
         goto mpg123_fail;
+
+    ret = set_format(da);
+    if (ret != MPG123_OK)
+        goto mpg123_fail;
+
+    if (con->sample_size < 1) {
+        MP_ERR(da, "no sample size\n");
+        return AD_ERR;
     }
 
+    int got_samples = bytes / con->sample_size;
+    da->decoded.planes[0] = audio;
+    da->decoded.samples = got_samples;
+    da->pts_offset += got_samples;
+
     update_info(da);
     return 0;
 
@@ -348,6 +273,7 @@ static int control(struct dec_audio *da, int cmd, void *arg)
 
     switch (cmd) {
     case ADCTRL_RESET:
+        mp_audio_set_null_data(&da->decoded);
         mpg123_close(con->handle);
 
         if (mpg123_open_feed(con->handle) != MPG123_OK) {
@@ -372,5 +298,5 @@ const struct ad_functions ad_mpg123 = {
     .init = init,
     .uninit = uninit,
     .control = control,
-    .decode_audio = decode_audio,
+    .decode_packet = decode_packet,
 };
diff --git a/audio/decode/ad_spdif.c b/audio/decode/ad_spdif.c
index 1042fdeb48..d655f91574 100644
--- a/audio/decode/ad_spdif.c
+++ b/audio/decode/ad_spdif.c
@@ -38,8 +38,7 @@ struct spdifContext {
     AVFormatContext *lavf_ctx;
     int              iec61937_packet_size;
     int              out_buffer_len;
-    int              out_buffer_size;
-    uint8_t         *out_buffer;
+    uint8_t          out_buffer[OUTBUF_SIZE];
     bool             need_close;
 };
 
@@ -47,7 +46,7 @@ static int write_packet(void *p, uint8_t *buf, int buf_size)
 {
     struct spdifContext *ctx = p;
 
-    int buffer_left = ctx->out_buffer_size - ctx->out_buffer_len;
+    int buffer_left = OUTBUF_SIZE - ctx->out_buffer_len;
     if (buf_size > buffer_left) {
         MP_ERR(ctx, "spdif packet too large.\n");
         buf_size = buffer_left;
@@ -183,24 +182,18 @@ fail:
     return 0;
 }
 
-static int decode_audio(struct dec_audio *da, struct mp_audio *buffer, int maxlen)
+static int decode_packet(struct dec_audio *da)
 {
     struct spdifContext *spdif_ctx = da->priv;
     AVFormatContext     *lavf_ctx  = spdif_ctx->lavf_ctx;
 
-    int sstride = 2 * da->decoded.channels.num;
-    assert(sstride == buffer->sstride);
-
-    if (maxlen * sstride < spdif_ctx->iec61937_packet_size)
-        return 0;
+    mp_audio_set_null_data(&da->decoded);
 
     spdif_ctx->out_buffer_len  = 0;
-    spdif_ctx->out_buffer_size = maxlen * sstride;
-    spdif_ctx->out_buffer      = buffer->planes[0];
 
     struct demux_packet *mpkt = demux_read_packet(da->header);
     if (!mpkt)
-        return AD_ERR;
+        return AD_EOF;
 
     AVPacket pkt;
     mp_set_av_packet(&pkt, mpkt, NULL);
@@ -211,13 +204,15 @@ static int decode_audio(struct dec_audio *da, struct mp_audio *buffer, int maxle
         da->pts_offset = 0;
     }
     int ret = av_write_frame(lavf_ctx, &pkt);
-    avio_flush(lavf_ctx->pb);
-    buffer->samples = spdif_ctx->out_buffer_len / sstride;
-    da->pts_offset += buffer->samples;
     talloc_free(mpkt);
+    avio_flush(lavf_ctx->pb);
     if (ret < 0)
         return AD_ERR;
 
+    da->decoded.planes[0] = spdif_ctx->out_buffer;
+    da->decoded.samples = spdif_ctx->out_buffer_len / da->decoded.sstride;
+    da->pts_offset += da->decoded.samples;
+
     return 0;
 }
 
@@ -253,5 +248,5 @@ const struct ad_functions ad_spdif = {
     .init = init,
     .uninit = uninit,
     .control = control,
-    .decode_audio = decode_audio,
+    .decode_packet = decode_packet,
 };
diff --git a/audio/decode/dec_audio.c b/audio/decode/dec_audio.c
index 907b154bf8..c2857353fa 100644
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@@ -56,13 +56,6 @@ static const struct ad_functions * const ad_drivers[] = {
     NULL
 };
 
-// ad_mpg123 needs to be able to decode 1152 samples at once
-// ad_spdif needs up to 8192
-#define DECODE_MAX_UNIT MPMAX(8192, 1152)
-
-// At least 8192 samples, plus hack for ad_mpg123 and ad_spdif
-#define DECODE_BUFFER_SAMPLES (8192 + DECODE_MAX_UNIT)
-
 // Drop audio buffer and reinit it (after format change)
 // Returns whether the format was valid at all.
 static bool reinit_audio_buffer(struct dec_audio *da)
@@ -73,7 +66,6 @@ static bool reinit_audio_buffer(struct dec_audio *da)
         return false;
     }
     mp_audio_buffer_reinit(da->decode_buffer, &da->decoded);
-    mp_audio_buffer_preallocate_min(da->decode_buffer, DECODE_BUFFER_SAMPLES);
     return true;
 }
 
@@ -97,6 +89,21 @@ static int init_audio_codec(struct dec_audio *d_audio, const char *decoder)
         return 0;
     }
 
+    // Decode enough until we know the audio format.
+    for (int tries = 1; ; tries++) {
+        if (mp_audio_config_valid(&d_audio->decoded))  {
+            MP_VERBOSE(d_audio, "Initial decode succeeded after %d packets.\n",
+                       tries);
+            break;
+        }
+        if (tries >= 50) {
+            MP_ERR(d_audio, "initial decode failed\n");
+            uninit_decoder(d_audio);
+            return 0;
+        }
+        d_audio->ad_driver->decode_packet(d_audio);
+    }
+
     d_audio->decode_buffer = mp_audio_buffer_create(NULL);
     if (!reinit_audio_buffer(d_audio)) {
         uninit_decoder(d_audio);
@@ -241,26 +248,28 @@ static int filter_n_bytes(struct dec_audio *da, struct mp_audio_buffer *outbuf,
     mp_audio_buffer_get_format(da->decode_buffer, &config);
 
     while (mp_audio_buffer_samples(da->decode_buffer) < len) {
-        int maxlen = mp_audio_buffer_get_write_available(da->decode_buffer);
-        if (maxlen < DECODE_MAX_UNIT)
-            break;
-        struct mp_audio buffer;
-        mp_audio_buffer_get_write_buffer(da->decode_buffer, maxlen, &buffer);
-        buffer.samples = 0;
-        error = da->ad_driver->decode_audio(da, &buffer, maxlen);
-        if (error < 0)
-            break;
-        // Commit the data just read as valid data
-        mp_audio_buffer_finish_write(da->decode_buffer, buffer.samples);
         // Format change
         if (!mp_audio_config_equals(&da->decoded, &config)) {
             // If there are still samples left in the buffer, let them drain
             // first, and don't signal a format change to the caller yet.
-            if (mp_audio_buffer_samples(da->decode_buffer) > 0)
-                break;
-            error = AD_NEW_FMT;
+            if (mp_audio_buffer_samples(da->decode_buffer) == 0)
+                error = AD_NEW_FMT;
             break;
         }
+        if (da->decoded.samples > 0) {
+            int copy = MPMIN(da->decoded.samples, len);
+            struct mp_audio append = da->decoded;
+            append.samples = copy;
+            mp_audio_buffer_append(da->decode_buffer, &append);
+            mp_audio_skip_samples(&da->decoded, copy);
+            continue;
+        }
+        error = da->ad_driver->decode_packet(da);
+        if (error < 0)
+            break;
+        // No progress means the decoder is buffering input data.
+        if (!da->decoded.samples)
+            break;
     }
 
     // Filter
diff --git a/audio/decode/dec_audio.h b/audio/decode/dec_audio.h
index 25afda1390..c1b5eafb49 100644
--- a/audio/decode/dec_audio.h
+++ b/audio/decode/dec_audio.h
@@ -38,9 +38,7 @@ struct dec_audio {
     char *decoder_desc;
     struct replaygain_data *replaygain_data;
     // set by decoder
-    struct mp_audio decoded;    // format of decoded audio (no data, temporarily
-                                // different from decode_buffer during format
-                                // changes)
+    struct mp_audio decoded;    // decoded audio set by last decode_packet() call
     int bitrate;                // input bitrate, can change with VBR sources
     // last known pts value in output from decoder
     double pts;
@@ -53,6 +51,7 @@ struct dec_audio {
 enum {
     AD_OK = 0,
     AD_ERR = -1,
+    AD_EOF = -1, // same as AD_ERR for now
     AD_NEW_FMT = -2,
     AD_ASYNC_PLAY_DONE = -3,
 };