From 6c43f33ac2e7606b2013f6261144389589394196 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Tue, 17 Jan 2017 15:54:57 +0100 Subject: [PATCH] avcodec/wmaprodec: >2 channel support for XMA Signed-off-by: Paul B Mahol --- libavcodec/wmaprodec.c | 274 ++++++++++++++++++++++++++++++----------- libavformat/wavdec.c | 3 + 2 files changed, 204 insertions(+), 73 deletions(-) diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c index 105e27999e..a53c64c1d3 100644 --- a/libavcodec/wmaprodec.c +++ b/libavcodec/wmaprodec.c @@ -207,19 +207,19 @@ typedef struct WMAProDecodeCtx { int subframe_offset; ///< subframe offset in the bit reservoir uint8_t packet_loss; ///< set in case of bitstream error uint8_t packet_done; ///< set when a packet is fully decoded - uint8_t skip_packets; /* frame decode state */ uint32_t frame_num; ///< current frame number (not used for decoding) - int num_frames; GetBitContext gb; ///< bitstream reader context int buf_bit_size; ///< buffer size in bits uint8_t drc_gain; ///< gain for the DRC tool int8_t skip_frame; ///< skip output step int8_t parsed_all_subframes; ///< all subframes decoded? + uint8_t skip_packets; /* subframe/block decode state */ int16_t subframe_len; ///< current subframe length + int8_t nb_channels; ///< number of channels in stream (XMA1/2) int8_t channels_for_cur_subframe; ///< number of channels that contain the subframe int8_t channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS]; int8_t num_bands; ///< number of scale factor bands @@ -234,6 +234,13 @@ typedef struct WMAProDecodeCtx { WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS]; ///< per channel data } WMAProDecodeCtx; +typedef struct XMADecodeCtx { + WMAProDecodeCtx xma[4]; + AVFrame *frames[4]; + int current_stream; + float samples[8][512 * 64]; + int offset[4]; +} XMADecodeCtx; /** *@brief helper function to print the most important members of the context @@ -250,7 +257,7 @@ static av_cold void dump_context(WMAProDecodeCtx *s) PRINT("log2 frame size", s->log2_frame_size); PRINT("max num subframes", s->max_num_subframes); PRINT("len prefix", s->len_prefix); - PRINT("num channels", s->avctx->channels); + PRINT("num channels", s->nb_channels); } /** @@ -258,9 +265,8 @@ static av_cold void dump_context(WMAProDecodeCtx *s) *@param avctx codec context *@return 0 on success, < 0 otherwise */ -static av_cold int decode_end(AVCodecContext *avctx) +static av_cold int decode_end(WMAProDecodeCtx *s) { - WMAProDecodeCtx *s = avctx->priv_data; int i; av_freep(&s->fdsp); @@ -271,6 +277,15 @@ static av_cold int decode_end(AVCodecContext *avctx) return 0; } +static av_cold int wmapro_decode_end(AVCodecContext *avctx) +{ + WMAProDecodeCtx *s = avctx->priv_data; + + decode_end(s); + + return 0; +} + static av_cold int get_rate(AVCodecContext *avctx) { if (avctx->codec_id != AV_CODEC_ID_WMAPRO) { // XXX: is this really only for XMA? @@ -291,9 +306,8 @@ static av_cold int get_rate(AVCodecContext *avctx) *@param avctx codec context *@return 0 on success, -1 otherwise */ -static av_cold int decode_init(AVCodecContext *avctx) +static av_cold int decode_init(WMAProDecodeCtx *s, AVCodecContext *avctx) { - WMAProDecodeCtx *s = avctx->priv_data; uint8_t *edata_ptr = avctx->extradata; unsigned int channel_mask; int i, bits; @@ -326,7 +340,6 @@ static av_cold int decode_init(AVCodecContext *avctx) s->decode_flags = 0x10d6; channel_mask = avctx->extradata ? AV_RL32(edata_ptr+2) : 0; s->bits_per_sample = 16; - } else if (avctx->codec_id == AV_CODEC_ID_XMA1) { s->decode_flags = 0x10d6; s->bits_per_sample = 16; @@ -346,8 +359,9 @@ static av_cold int decode_init(AVCodecContext *avctx) } if (avctx->codec_id != AV_CODEC_ID_WMAPRO && avctx->channels > 2) { - avpriv_report_missing_feature(avctx, ">2 channels support"); - return AVERROR_PATCHWELCOME; + s->nb_channels = 2; + } else { + s->nb_channels = avctx->channels; } /** generic init */ @@ -406,18 +420,18 @@ static av_cold int decode_init(AVCodecContext *avctx) return AVERROR_INVALIDDATA; } - if (avctx->channels < 0) { + if (s->nb_channels <= 0) { av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n", - avctx->channels); + s->nb_channels); return AVERROR_INVALIDDATA; - } else if (avctx->channels > WMAPRO_MAX_CHANNELS) { + } else if (s->nb_channels > WMAPRO_MAX_CHANNELS) { avpriv_request_sample(avctx, "More than %d channels", WMAPRO_MAX_CHANNELS); return AVERROR_PATCHWELCOME; } /** init previous block len */ - for (i = 0; i < avctx->channels; i++) + for (i = 0; i < s->nb_channels; i++) s->channel[i].prev_block_len = s->samples_per_frame; /** extract lfe channel position */ @@ -542,6 +556,18 @@ static av_cold int decode_init(AVCodecContext *avctx) return 0; } +/** + *@brief Initialize the decoder. + *@param avctx codec context + *@return 0 on success, -1 otherwise + */ +static av_cold int wmapro_decode_init(AVCodecContext *avctx) +{ + WMAProDecodeCtx *s = avctx->priv_data; + + return decode_init(s, avctx); +} + /** *@brief Decode the subframe length. *@param s context @@ -603,7 +629,7 @@ static int decode_tilehdr(WMAProDecodeCtx *s) { uint16_t num_samples[WMAPRO_MAX_CHANNELS] = { 0 };/**< sum of samples for all currently known subframes of a channel */ uint8_t contains_subframe[WMAPRO_MAX_CHANNELS]; /**< flag indicating if a channel contains the current subframe */ - int channels_for_cur_subframe = s->avctx->channels; /**< number of channels that contain the current subframe */ + int channels_for_cur_subframe = s->nb_channels; /**< number of channels that contain the current subframe */ int fixed_channel_layout = 0; /**< flag indicating that all channels use the same subframe offsets and sizes */ int min_channel_len = 0; /**< smallest sum of samples (channels with this length will be processed first) */ int c; @@ -615,7 +641,7 @@ static int decode_tilehdr(WMAProDecodeCtx *s) */ /** reset tiling information */ - for (c = 0; c < s->avctx->channels; c++) + for (c = 0; c < s->nb_channels; c++) s->channel[c].num_subframes = 0; if (s->max_num_subframes == 1 || get_bits1(&s->gb)) @@ -626,7 +652,7 @@ static int decode_tilehdr(WMAProDecodeCtx *s) int subframe_len; /** check which channels contain the subframe */ - for (c = 0; c < s->avctx->channels; c++) { + for (c = 0; c < s->nb_channels; c++) { if (num_samples[c] == min_channel_len) { if (fixed_channel_layout || channels_for_cur_subframe == 1 || (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe)) @@ -643,7 +669,7 @@ static int decode_tilehdr(WMAProDecodeCtx *s) /** add subframes to the individual channels and find new min_channel_len */ min_channel_len += subframe_len; - for (c = 0; c < s->avctx->channels; c++) { + for (c = 0; c < s->nb_channels; c++) { WMAProChannelCtx* chan = &s->channel[c]; if (contains_subframe[c]) { @@ -670,7 +696,7 @@ static int decode_tilehdr(WMAProDecodeCtx *s) } } while (min_channel_len < s->samples_per_frame); - for (c = 0; c < s->avctx->channels; c++) { + for (c = 0; c < s->nb_channels; c++) { int i; int offset = 0; for (i = 0; i < s->channel[c].num_subframes; i++) { @@ -696,8 +722,8 @@ static void decode_decorrelation_matrix(WMAProDecodeCtx *s, int i; int offset = 0; int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS]; - memset(chgroup->decorrelation_matrix, 0, s->avctx->channels * - s->avctx->channels * sizeof(*chgroup->decorrelation_matrix)); + memset(chgroup->decorrelation_matrix, 0, s->nb_channels * + s->nb_channels * sizeof(*chgroup->decorrelation_matrix)); for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++) rotation_offset[i] = get_bits(&s->gb, 6); @@ -750,7 +776,7 @@ static int decode_channel_transform(WMAProDecodeCtx* s) /** in the one channel case channel transforms are pointless */ s->num_chgroups = 0; - if (s->avctx->channels > 1) { + if (s->nb_channels > 1) { int remaining_channels = s->channels_for_cur_subframe; if (get_bits1(&s->gb)) { @@ -797,7 +823,7 @@ static int decode_channel_transform(WMAProDecodeCtx* s) } } else { chgroup->transform = 1; - if (s->avctx->channels == 2) { + if (s->nb_channels == 2) { chgroup->decorrelation_matrix[0] = 1.0; chgroup->decorrelation_matrix[1] = -1.0; chgroup->decorrelation_matrix[2] = 1.0; @@ -1087,7 +1113,7 @@ static void inverse_channel_transform(WMAProDecodeCtx *s) (*ch)[y] = sum; } } - } else if (s->avctx->channels == 2) { + } else if (s->nb_channels == 2) { int len = FFMIN(sfb[1], s->subframe_len) - sfb[0]; s->fdsp->vector_fmul_scalar(ch_data[0] + sfb[0], ch_data[0] + sfb[0], @@ -1140,7 +1166,7 @@ static int decode_subframe(WMAProDecodeCtx *s) int offset = s->samples_per_frame; int subframe_len = s->samples_per_frame; int i; - int total_samples = s->samples_per_frame * s->avctx->channels; + int total_samples = s->samples_per_frame * s->nb_channels; int transmit_coeffs = 0; int cur_subwoofer_cutoff; @@ -1150,7 +1176,7 @@ static int decode_subframe(WMAProDecodeCtx *s) == the next block of the channel with the smallest number of decoded samples */ - for (i = 0; i < s->avctx->channels; i++) { + for (i = 0; i < s->nb_channels; i++) { s->channel[i].grouped = 0; if (offset > s->channel[i].decoded_samples) { offset = s->channel[i].decoded_samples; @@ -1164,7 +1190,7 @@ static int decode_subframe(WMAProDecodeCtx *s) /** get a list of all channels that contain the estimated block */ s->channels_for_cur_subframe = 0; - for (i = 0; i < s->avctx->channels; i++) { + for (i = 0; i < s->nb_channels; i++) { const int cur_subframe = s->channel[i].cur_subframe; /** subtract already processed samples */ total_samples -= s->channel[i].decoded_samples; @@ -1377,11 +1403,10 @@ static int decode_subframe(WMAProDecodeCtx *s) */ static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr) { - AVCodecContext *avctx = s->avctx; GetBitContext* gb = &s->gb; int more_frames = 0; int len = 0; - int i, ret; + int i; /** get frame length */ if (s->len_prefix) @@ -1396,9 +1421,9 @@ static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr) } /** read postproc transform */ - if (s->avctx->channels > 1 && get_bits1(gb)) { + if (s->nb_channels > 1 && get_bits1(gb)) { if (get_bits1(gb)) { - for (i = 0; i < avctx->channels * avctx->channels; i++) + for (i = 0; i < s->nb_channels * s->nb_channels; i++) skip_bits(gb, 4); } } @@ -1433,7 +1458,7 @@ static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr) /** reset subframe states */ s->parsed_all_subframes = 0; - for (i = 0; i < avctx->channels; i++) { + for (i = 0; i < s->nb_channels; i++) { s->channel[i].decoded_samples = 0; s->channel[i].cur_subframe = 0; s->channel[i].reuse_sf = 0; @@ -1447,19 +1472,12 @@ static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr) } } - /* get output buffer */ - frame->nb_samples = s->samples_per_frame; - if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { - s->packet_loss = 1; - return 0; - } - /** copy samples to the output buffer */ - for (i = 0; i < avctx->channels; i++) + for (i = 0; i < s->nb_channels; i++) memcpy(frame->extended_data[i], s->channel[i].out, s->samples_per_frame * sizeof(*s->channel[i].out)); - for (i = 0; i < avctx->channels; i++) { + for (i = 0; i < s->nb_channels; i++) { /** reuse second half of the IMDCT output for the next frame */ memcpy(&s->channel[i].out[0], &s->channel[i].out[s->samples_per_frame], @@ -1564,17 +1582,9 @@ static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len, skip_bits(&s->gb, s->frame_offset); } -/** - *@brief Decode a single WMA packet. - *@param avctx codec context - *@param data the output buffer - *@param avpkt input packet - *@return number of bytes that were read from the input buffer - */ -static int decode_packet(AVCodecContext *avctx, void *data, - int *got_frame_ptr, AVPacket* avpkt) +static int decode_packet(AVCodecContext *avctx, WMAProDecodeCtx *s, + void *data, int *got_frame_ptr, AVPacket *avpkt) { - WMAProDecodeCtx *s = avctx->priv_data; GetBitContext* gb = &s->pgb; const uint8_t* buf = avpkt->data; int buf_size = avpkt->size; @@ -1583,11 +1593,6 @@ static int decode_packet(AVCodecContext *avctx, void *data, *got_frame_ptr = 0; - if (s->skip_packets > 0) { - s->skip_packets--; - return FFMIN(avpkt->size, avctx->block_align); - } - if (s->packet_done || s->packet_loss) { s->packet_done = 0; @@ -1613,7 +1618,8 @@ static int decode_packet(AVCodecContext *avctx, void *data, packet_sequence_number = get_bits(gb, 4); skip_bits(gb, 2); } else { - s->num_frames = get_bits(gb, 6); + int num_frames = get_bits(gb, 6); + ff_dlog(avctx, "packet[%d]: number of frames %d\n", avctx->frame_number, num_frames); packet_sequence_number = 0; } @@ -1622,6 +1628,7 @@ static int decode_packet(AVCodecContext *avctx, void *data, if (avctx->codec_id != AV_CODEC_ID_WMAPRO) { skip_bits(gb, 3); s->skip_packets = get_bits(gb, 8); + ff_dlog(avctx, "packet[%d]: skip packets %d\n", avctx->frame_number, s->skip_packets); } ff_dlog(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number, @@ -1665,7 +1672,6 @@ static int decode_packet(AVCodecContext *avctx, void *data, s->num_saved_bits = 0; s->packet_loss = 0; } - } else { int frame_size; s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3; @@ -1687,8 +1693,9 @@ static int decode_packet(AVCodecContext *avctx, void *data, the "previous frame" data from the next packet so that we get a buffer that only contains full frames */ s->packet_done = !decode_frame(s, data, got_frame_ptr); - } else + } else { s->packet_done = 1; + } } if (remaining_bits(s, gb) < 0) { @@ -1710,6 +1717,129 @@ static int decode_packet(AVCodecContext *avctx, void *data, return get_bits_count(gb) >> 3; } +/** + *@brief Decode a single WMA packet. + *@param avctx codec context + *@param data the output buffer + *@param avpkt input packet + *@return number of bytes that were read from the input buffer + */ +static int wmapro_decode_packet(AVCodecContext *avctx, void *data, + int *got_frame_ptr, AVPacket *avpkt) +{ + WMAProDecodeCtx *s = avctx->priv_data; + AVFrame *frame = data; + int ret; + + /* get output buffer */ + frame->nb_samples = s->samples_per_frame; + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { + s->packet_loss = 1; + return 0; + } + + return decode_packet(avctx, s, data, got_frame_ptr, avpkt); +} + +static int xma_decode_packet(AVCodecContext *avctx, void *data, + int *got_frame_ptr, AVPacket *avpkt) +{ + XMADecodeCtx *s = avctx->priv_data; + int got_stream_frame_ptr = 0; + AVFrame *frame = data; + int i, ret, offset = INT_MAX; + + ret = decode_packet(avctx, &s->xma[s->current_stream], s->frames[s->current_stream], + &got_stream_frame_ptr, avpkt); + + if (got_stream_frame_ptr) { + memcpy(&s->samples[s->current_stream * 2 + 0][s->offset[s->current_stream] * 512], + s->frames[s->current_stream]->extended_data[0], 512 * 4); + memcpy(&s->samples[s->current_stream * 2 + 1][s->offset[s->current_stream] * 512], + s->frames[s->current_stream]->extended_data[1], 512 * 4); + s->offset[s->current_stream]++; + } + + if (s->xma[s->current_stream].packet_done || + s->xma[s->current_stream].packet_loss) { + int bret; + + if (s->xma[0].skip_packets == 0) { + s->current_stream = 0; + } else if (s->xma[1].skip_packets == 0) { + s->current_stream = 1; + } else if (s->xma[2].skip_packets == 0) { + s->current_stream = 2; + } else if (s->xma[3].skip_packets == 0) { + s->current_stream = 3; + } else { + s->current_stream++; + if (s->current_stream >= avctx->channels / 2) + s->current_stream = 0; + } + for (i = 0; i < avctx->channels / 2; i++) { + s->xma[i].skip_packets = FFMAX(0, s->xma[i].skip_packets - 1); + } + + for (i = 0; i < avctx->channels / 2; i++) { + offset = FFMIN(offset, s->offset[i]); + } + + if (offset > 0) { + frame->nb_samples = 512 * offset; + if ((bret = ff_get_buffer(avctx, frame, 0)) < 0) + return bret; + + for (i = 0; i < avctx->channels / 2; i++) { + memcpy(frame->extended_data[i * 2 + 0], s->samples[i * 2 + 0], frame->nb_samples * 4); + memcpy(frame->extended_data[i * 2 + 1], s->samples[i * 2 + 1], frame->nb_samples * 4); + s->offset[i] -= offset; + if (s->offset[i]) { + memmove(s->samples[i * 2 + 0], s->samples[i * 2 + 0] + frame->nb_samples, s->offset[i] * 4 * 512); + memmove(s->samples[i * 2 + 1], s->samples[i * 2 + 1] + frame->nb_samples, s->offset[i] * 4 * 512); + } + } + + *got_frame_ptr = 1; + } + } + + return ret; +} + +static av_cold int xma_decode_init(AVCodecContext *avctx) +{ + XMADecodeCtx *s = avctx->priv_data; + int i, ret; + + for (i = 0; i < avctx->channels / 2; i++) { + ret = decode_init(&s->xma[i], avctx); + s->frames[i] = av_frame_alloc(); + if (!s->frames[i]) + return AVERROR(ENOMEM); + s->frames[i]->nb_samples = 512; + if ((ret = ff_get_buffer(avctx, s->frames[i], 0)) < 0) { + return AVERROR(ENOMEM); + } + + } + + return ret; +} + +static av_cold int xma_decode_end(AVCodecContext *avctx) +{ + XMADecodeCtx *s = avctx->priv_data; + int i; + + for (i = 0; i < avctx->channels / 2; i++) { + decode_end(&s->xma[i]); + av_frame_free(&s->frames[i]); + } + + return 0; +} + /** *@brief Clear decoder buffers (for seeking). *@param avctx codec context @@ -1720,7 +1850,7 @@ static void flush(AVCodecContext *avctx) int i; /** reset output buffer as a part of it is used during the windowing of a new frame */ - for (i = 0; i < avctx->channels; i++) + for (i = 0; i < s->nb_channels; i++) memset(s->channel[i].out, 0, s->samples_per_frame * sizeof(*s->channel[i].out)); s->packet_loss = 1; @@ -1736,9 +1866,9 @@ AVCodec ff_wmapro_decoder = { .type = AVMEDIA_TYPE_AUDIO, .id = AV_CODEC_ID_WMAPRO, .priv_data_size = sizeof(WMAProDecodeCtx), - .init = decode_init, - .close = decode_end, - .decode = decode_packet, + .init = wmapro_decode_init, + .close = wmapro_decode_end, + .decode = wmapro_decode_packet, .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1, .flush = flush, .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, @@ -1750,12 +1880,11 @@ AVCodec ff_xma1_decoder = { .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"), .type = AVMEDIA_TYPE_AUDIO, .id = AV_CODEC_ID_XMA1, - .priv_data_size = sizeof(WMAProDecodeCtx), - .init = decode_init, - .close = decode_end, - .decode = decode_packet, + .priv_data_size = sizeof(XMADecodeCtx), + .init = xma_decode_init, + .close = xma_decode_end, + .decode = xma_decode_packet, .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1, - .flush = flush, .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }, }; @@ -1765,12 +1894,11 @@ AVCodec ff_xma2_decoder = { .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"), .type = AVMEDIA_TYPE_AUDIO, .id = AV_CODEC_ID_XMA2, - .priv_data_size = sizeof(WMAProDecodeCtx), - .init = decode_init, - .close = decode_end, - .decode = decode_packet, + .priv_data_size = sizeof(XMADecodeCtx), + .init = xma_decode_init, + .close = xma_decode_end, + .decode = xma_decode_packet, .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1, - .flush = flush, .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }, }; diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c index 987155e39e..4046809cf6 100644 --- a/libavformat/wavdec.c +++ b/libavformat/wavdec.c @@ -567,6 +567,9 @@ break_loop: st->codecpar->block_align == st->codecpar->channels * 4 && st->codecpar->bits_per_coded_sample == 24) { st->codecpar->codec_id = AV_CODEC_ID_PCM_F24LE; + } else if (st->codecpar->codec_id == AV_CODEC_ID_XMA1 || + st->codecpar->codec_id == AV_CODEC_ID_XMA2) { + st->codecpar->block_align = 2048; } ff_metadata_conv_ctx(s, NULL, wav_metadata_conv);