lavc: use a separate field for exporting audio encoder padding

Currently, the amount of padding inserted at the beginning by some audio
encoders, is exported through AVCodecContext.delay. However
- the term 'delay' is heavily overloaded and can have multiple different
  meanings even in the case of audio encoding.
- this field has entirely different meanings, depending on whether the
  codec context is used for encoding or decoding (and has yet another
  different meaning for video), preventing generic handling of the codec
  context.

Therefore, add a new field -- AVCodecContext.initial_padding. It could
conceivably be used for decoding as well at a later point.
This commit is contained in:
Anton Khirnov 2014-08-23 12:40:50 +00:00
parent c80a816142
commit 2df0c32ea1
22 changed files with 63 additions and 40 deletions

View File

@ -13,6 +13,10 @@ libavutil: 2014-08-09
API changes, most recent first:
2014-10-13 - xxxxxxx - lavc 55.03.0 - avcodec.h
Add AVCodecContext.initial_padding. Deprecate the use of AVCodecContext.delay
for audio encoding.
2014-09-xx - xxxxxxx - lavu 54.04.0 - pixdesc.h
Add API to return the name of frame and context color properties.

View File

@ -777,7 +777,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
for (i = 0; i < 428; i++)
ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
avctx->delay = 1024;
avctx->initial_padding = 1024;
ff_af_queue_init(avctx, &s->afq);
return 0;

View File

@ -2436,7 +2436,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
return ret;
avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
avctx->delay = AC3_BLOCK_SIZE;
avctx->initial_padding = AC3_BLOCK_SIZE;
s->bitstream_mode = avctx->audio_service_type;
if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)

View File

@ -29,8 +29,8 @@ av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
{
afq->avctx = avctx;
afq->next_pts = AV_NOPTS_VALUE;
afq->remaining_delay = avctx->delay;
afq->remaining_samples = avctx->delay;
afq->remaining_delay = avctx->initial_padding;
afq->remaining_samples = avctx->initial_padding;
afq->frame_queue = NULL;
}

View File

@ -1191,16 +1191,7 @@ typedef struct AVCodecContext {
* encoded input.
*
* Audio:
* For encoding, this is the number of "priming" samples added by the
* encoder to the beginning of the stream. The decoded output will be
* delayed by this many samples relative to the input to the encoder (or
* more, if the decoder adds its own padding).
* The timestamps on the output packets are adjusted by the encoder so
* that they always refer to the first sample of the data actually
* contained in the packet, including any added padding.
* E.g. if the timebase is 1/samplerate and the timestamp of the first
* input sample is 0, the timestamp of the first output packet will be
* -delay.
* For encoding, this field is unused (see initial_padding).
*
* For decoding, this is the number of samples the decoder needs to
* output before the decoder's output is valid. When seeking, you should
@ -2780,6 +2771,23 @@ typedef struct AVCodecContext {
* use AVOptions to set this field.
*/
int side_data_only_packets;
/**
* Audio only. The number of "priming" samples (padding) inserted by the
* encoder at the beginning of the audio. I.e. this number of leading
* decoded samples must be discarded by the caller to get the original audio
* without leading padding.
*
* - decoding: unused
* - encoding: Set by libavcodec. The timestamps on the output packets are
* adjusted by the encoder so that they always refer to the
* first sample of the data actually contained in the packet,
* including any added padding. E.g. if the timebase is
* 1/samplerate and the timestamp of the first input sample is
* 0, the timestamp of the first output packet will be
* -initial_padding.
*/
int initial_padding;
} AVCodecContext;
/**

View File

@ -106,7 +106,7 @@ static av_cold int g722_encode_init(AVCodecContext * avctx)
a common packet size for VoIP applications */
avctx->frame_size = 320;
}
avctx->delay = 22;
avctx->initial_padding = 22;
if (avctx->trellis) {
/* validate trellis */
@ -375,7 +375,7 @@ static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
}
if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
*got_packet_ptr = 1;
return 0;
}

View File

@ -157,7 +157,7 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
goto error;
}
avctx->delay = FAAC_DELAY_SAMPLES;
avctx->initial_padding = FAAC_DELAY_SAMPLES;
ff_af_queue_init(avctx, &s->afq);
return 0;

View File

@ -286,7 +286,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
}
avctx->frame_size = info.frameLength;
avctx->delay = info.encoderDelay;
avctx->initial_padding = info.encoderDelay;
ff_af_queue_init(avctx, &s->afq);
if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {

View File

@ -137,7 +137,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
}
/* get encoder delay */
avctx->delay = lame_get_encoder_delay(s->gfp) + 528 + 1;
avctx->initial_padding = lame_get_encoder_delay(s->gfp) + 528 + 1;
ff_af_queue_init(avctx, &s->afq);
avctx->frame_size = lame_get_framesize(s->gfp);

View File

@ -200,7 +200,7 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
}
avctx->frame_size = 160;
avctx->delay = 50;
avctx->initial_padding = 50;
ff_af_queue_init(avctx, &s->afq);
s->enc_state = Encoder_Interface_init(s->enc_dtx);
@ -250,7 +250,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
return AVERROR(ENOMEM);
memcpy(flush_buf, samples, frame->nb_samples * sizeof(*flush_buf));
samples = flush_buf;
if (frame->nb_samples < avctx->frame_size - avctx->delay)
if (frame->nb_samples < avctx->frame_size - avctx->initial_padding)
s->enc_last_frame = -1;
}
if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) {

View File

@ -87,7 +87,7 @@ static void libopus_write_header(AVCodecContext *avctx, int stream_count,
bytestream_put_buffer(&p, "OpusHead", 8);
bytestream_put_byte(&p, 1); /* Version */
bytestream_put_byte(&p, channels);
bytestream_put_le16(&p, avctx->delay); /* Lookahead samples at 48kHz */
bytestream_put_le16(&p, avctx->initial_padding); /* Lookahead samples at 48kHz */
bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
@ -277,7 +277,7 @@ static int av_cold libopus_encode_init(AVCodecContext *avctx)
goto fail;
}
ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->delay));
ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
if (ret != OPUS_OK)
av_log(avctx, AV_LOG_WARNING,
"Unable to get number of lookahead samples: %s\n",

View File

@ -235,7 +235,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
s->header.frames_per_packet = s->frames_per_packet;
/* set encoding delay */
speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->initial_padding);
ff_af_queue_init(avctx, &s->afq);
/* create header packet bytes from header struct */

View File

@ -60,7 +60,7 @@ static av_cold int twolame_encode_init(AVCodecContext *avctx)
int ret;
avctx->frame_size = TWOLAME_SAMPLES_PER_FRAME;
avctx->delay = 512 - 32 + 1;
avctx->initial_padding = 512 - 32 + 1;
s->glopts = twolame_init();
if (!s->glopts)
@ -151,7 +151,7 @@ static int twolame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
if (frame) {
if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
} else {
avpkt->pts = s->next_pts;
}

View File

@ -61,7 +61,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
int index, ret;
avctx->frame_size = FRAME_SIZE;
avctx->delay = ENC_DELAY;
avctx->initial_padding = ENC_DELAY;
s->last_frame = 2;
ff_af_queue_init(avctx, &s->afq);

View File

@ -93,7 +93,7 @@ static av_cold int amr_wb_encode_init(AVCodecContext *avctx)
s->last_bitrate = avctx->bit_rate;
avctx->frame_size = 320;
avctx->delay = 80;
avctx->initial_padding = 80;
s->state = E_IF_init();
@ -131,7 +131,7 @@ static int amr_wb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
}
if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
avpkt->size = size;
*got_packet_ptr = 1;

View File

@ -322,8 +322,8 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
if (duration > 0) {
/* we do not know encoder delay until we get the first packet from
* libvorbis, so we have to update the AudioFrameQueue counts */
if (!avctx->delay) {
avctx->delay = duration;
if (!avctx->initial_padding) {
avctx->initial_padding = duration;
s->afq.remaining_delay += duration;
s->afq.remaining_samples += duration;
}

View File

@ -84,7 +84,7 @@ static av_cold int MPA_encode_init(AVCodecContext *avctx)
bitrate = bitrate / 1000;
s->nb_channels = channels;
avctx->frame_size = MPA_FRAME_SIZE;
avctx->delay = 512 - 32 + 1;
avctx->initial_padding = 512 - 32 + 1;
/* encoding freq */
s->lsf = 0;
@ -735,7 +735,7 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
encode_frame(s, bit_alloc, padding);
if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
avpkt->size = put_bits_count(&s->pb) / 8;
*got_packet_ptr = 1;

View File

@ -165,7 +165,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
}
avctx->frame_size = NELLY_SAMPLES;
avctx->delay = NELLY_BUF_LEN;
avctx->initial_padding = NELLY_BUF_LEN;
ff_af_queue_init(avctx, &s->afq);
s->avctx = avctx;
if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)

View File

@ -56,7 +56,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
return -1;
}
avctx->frame_size = NBLOCKS * BLOCKSIZE;
avctx->delay = avctx->frame_size;
avctx->initial_padding = avctx->frame_size;
avctx->bit_rate = 8000;
ractx = avctx->priv_data;
ractx->lpc_coef[0] = ractx->lpc_tables[0];

View File

@ -1240,6 +1240,11 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
}
}
#if FF_API_AUDIOENC_DELAY
if (av_codec_is_encoder(avctx->codec))
avctx->delay = avctx->initial_padding;
#endif
if (av_codec_is_decoder(avctx->codec)) {
/* validate channel layout from the decoder */
if (avctx->channel_layout) {
@ -1447,6 +1452,10 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
end:
av_frame_free(&padded_frame);
#if FF_API_AUDIOENC_DELAY
avctx->delay = avctx->initial_padding;
#endif
return ret;
}

View File

@ -29,8 +29,8 @@
#include "libavutil/version.h"
#define LIBAVCODEC_VERSION_MAJOR 56
#define LIBAVCODEC_VERSION_MINOR 2
#define LIBAVCODEC_VERSION_MICRO 2
#define LIBAVCODEC_VERSION_MINOR 3
#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \
@ -153,5 +153,8 @@
#ifndef FF_API_AFD
#define FF_API_AFD (LIBAVCODEC_VERSION_MAJOR < 57)
#endif
#ifndef FF_API_AUDIOENC_DELAY
#define FF_API_AUDIOENC_DELAY (LIBAVCODEC_VERSION_MAJOR < 58)
#endif
#endif /* AVCODEC_VERSION_H */

View File

@ -92,8 +92,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
avctx->block_align = block_align;
avctx->bit_rate = avctx->block_align * 8LL * avctx->sample_rate /
s->frame_len;
avctx->frame_size =
avctx->delay = s->frame_len;
avctx->frame_size = avctx->initial_padding = s->frame_len;
return 0;
}
@ -420,7 +419,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
flush_put_bits(&s->pb);
if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
avpkt->size = avctx->block_align;
*got_packet_ptr = 1;