twinvq: support multiple frames per packet

Signed-off-by: Diego Biurrun <diego@biurrun.de>
2013-10-20 16:15:03 +02:00 · 2013-10-20 16:15:03 +02:00 · 1afa8a7568
parent ddb839e928
commit 1afa8a7568
5 changed files with 125 additions and 63 deletions
--- a/libavcodec/metasound.c
+++ b/libavcodec/metasound.c
@ -163,7 +163,7 @@ static void read_cb_data(TwinVQContext *tctx, GetBitContext *gb,
 static int metasound_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
                                    const uint8_t *buf, int buf_size)
 {
-    TwinVQFrameData     *bits = &tctx->bits;
+    TwinVQFrameData     *bits;
    const TwinVQModeTab *mtab = tctx->mtab;
    int channels              = tctx->avctx->channels;
    int sub;
@ -172,58 +172,67 @@ static int metasound_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,

    init_get_bits(&gb, buf, buf_size * 8);

-    bits->window_type = get_bits(&gb, TWINVQ_WINDOW_TYPE_BITS);
+    for (tctx->cur_frame = 0; tctx->cur_frame < tctx->frames_per_packet;
+         tctx->cur_frame++) {
+        bits = tctx->bits + tctx->cur_frame;

-    if (bits->window_type > 8) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n");
-        return AVERROR_INVALIDDATA;
-    }
+        bits->window_type = get_bits(&gb, TWINVQ_WINDOW_TYPE_BITS);

-    bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits.window_type];
+        if (bits->window_type > 8) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n");
+            return AVERROR_INVALIDDATA;
+        }

-    sub = mtab->fmode[bits->ftype].sub;
+        bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits[tctx->cur_frame].window_type];

-    if (bits->ftype != TWINVQ_FT_SHORT)
-        get_bits(&gb, 2);
+        sub = mtab->fmode[bits->ftype].sub;

-    read_cb_data(tctx, &gb, bits->main_coeffs, bits->ftype);
+        if (bits->ftype != TWINVQ_FT_SHORT)
+            get_bits(&gb, 2);

-    for (i = 0; i < channels; i++)
-        for (j = 0; j < sub; j++)
-            for (k = 0; k < mtab->fmode[bits->ftype].bark_n_coef; k++)
-                bits->bark1[i][j][k] =
-                    get_bits(&gb, mtab->fmode[bits->ftype].bark_n_bit);
+        read_cb_data(tctx, &gb, bits->main_coeffs, bits->ftype);

-    for (i = 0; i < channels; i++)
-        for (j = 0; j < sub; j++)
-            bits->bark_use_hist[i][j] = get_bits1(&gb);
-
-    if (bits->ftype == TWINVQ_FT_LONG) {
        for (i = 0; i < channels; i++)
-            bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
-    } else {
-        for (i = 0; i < channels; i++) {
-            bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
            for (j = 0; j < sub; j++)
-                bits->sub_gain_bits[i * sub + j] =
-                    get_bits(&gb, TWINVQ_SUB_GAIN_BITS);
+                for (k = 0; k < mtab->fmode[bits->ftype].bark_n_coef; k++)
+                    bits->bark1[i][j][k] =
+                        get_bits(&gb, mtab->fmode[bits->ftype].bark_n_bit);
+
+        for (i = 0; i < channels; i++)
+            for (j = 0; j < sub; j++)
+                bits->bark_use_hist[i][j] = get_bits1(&gb);
+
+        if (bits->ftype == TWINVQ_FT_LONG) {
+            for (i = 0; i < channels; i++)
+                bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
+        } else {
+            for (i = 0; i < channels; i++) {
+                bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
+                for (j = 0; j < sub; j++)
+                    bits->sub_gain_bits[i * sub + j] =
+                        get_bits(&gb, TWINVQ_SUB_GAIN_BITS);
+            }
        }
-    }

-    for (i = 0; i < channels; i++) {
-        bits->lpc_hist_idx[i] = get_bits(&gb, mtab->lsp_bit0);
-        bits->lpc_idx1[i]     = get_bits(&gb, mtab->lsp_bit1);
-
-        for (j = 0; j < mtab->lsp_split; j++)
-            bits->lpc_idx2[i][j] = get_bits(&gb, mtab->lsp_bit2);
-    }
-
-    if (bits->ftype == TWINVQ_FT_LONG) {
-        read_cb_data(tctx, &gb, bits->ppc_coeffs, 3);
        for (i = 0; i < channels; i++) {
-            bits->p_coef[i] = get_bits(&gb, mtab->ppc_period_bit);
-            bits->g_coef[i] = get_bits(&gb, mtab->pgain_bit);
+            bits->lpc_hist_idx[i] = get_bits(&gb, mtab->lsp_bit0);
+            bits->lpc_idx1[i]     = get_bits(&gb, mtab->lsp_bit1);
+
+            for (j = 0; j < mtab->lsp_split; j++)
+                bits->lpc_idx2[i][j] = get_bits(&gb, mtab->lsp_bit2);
        }
+
+        if (bits->ftype == TWINVQ_FT_LONG) {
+            read_cb_data(tctx, &gb, bits->ppc_coeffs, 3);
+            for (i = 0; i < channels; i++) {
+                bits->p_coef[i] = get_bits(&gb, mtab->ppc_period_bit);
+                bits->g_coef[i] = get_bits(&gb, mtab->pgain_bit);
+            }
+        }
+
+        // subframes are aligned to nibbles
+        if (get_bits_count(&gb) & 3)
+            skip_bits(&gb, 4 - (get_bits_count(&gb) & 3));
    }

    return 0;
@ -316,6 +325,12 @@ static av_cold int metasound_decode_init(AVCodecContext *avctx)
    case (2 << 16) + (16 << 8) + 16:
        tctx->mtab = &ff_metasound_mode1616s;
        break;
+    case (1 << 16) + (22 << 8) + 24:
+        tctx->mtab = &ff_metasound_mode2224;
+        break;
+    case (2 << 16) + (22 << 8) + 24:
+        tctx->mtab = &ff_metasound_mode2224s;
+        break;
    case (1 << 16) + (44 << 8) + 32:
        tctx->mtab = &ff_metasound_mode4432;
        break;
@ -341,13 +356,12 @@ static av_cold int metasound_decode_init(AVCodecContext *avctx)
        return AVERROR(ENOSYS);
    }

-    avctx->block_align = (avctx->bit_rate * tctx->mtab->size
-                                          / avctx->sample_rate + 7) / 8;
-
    tctx->codec          = TWINVQ_CODEC_METASOUND;
    tctx->read_bitstream = metasound_read_bitstream;
    tctx->dec_bark_env   = dec_bark_env;
    tctx->decode_ppc     = decode_ppc;
+    tctx->frame_size     = avctx->bit_rate * tctx->mtab->size
+                                           / avctx->sample_rate;

    return ff_twinvq_decode_init(avctx);
 }
--- a/libavcodec/metasound_data.c
+++ b/libavcodec/metasound_data.c
@ -15244,6 +15244,24 @@ const TwinVQModeTab ff_metasound_mode1616s = {
    1024, 16, lsp16, 1, 6, 4, 3, shape16s, 9, 56, 60, 7, 200
 };

+const TwinVQModeTab ff_metasound_mode2224 = {
+    {
+        { 8, bark_tab_s22_128,  10, fcb22s, 1, 6, cb2224s0, cb2224s1, 15 },
+        { 2, bark_tab_m22_512,  20, fcb22m, 2, 6, cb2224m0, cb2224m1, 14 },
+        { 1, bark_tab_l22_1024, 32, fcb22l, 4, 6, cb2224l0, cb2224l1, 15 }
+    },
+    1024, 16, lsp22, 1, 6, 4, 3, shape22, 9, 56, 36, 7, 200
+};
+
+const TwinVQModeTab ff_metasound_mode2224s = {
+    {
+        { 8, bark_tab_s22s_128,  10, fcb22ss, 1, 6, cb2224ss0, cb2224ss1, 15 },
+        { 2, bark_tab_m22s_512,  20, fcb22sm, 2, 6, cb2224sm0, cb2224sm1, 14 },
+        { 1, bark_tab_l22s_1024, 32, fcb22sl, 4, 6, cb2224sl0, cb2224sl1, 15 }
+    },
+    1024, 16, lsp22s, 1, 6, 4, 3, shape22s, 9, 56, 36, 7, 200
+};
+
 const TwinVQModeTab ff_metasound_mode4432 = {
    {
        { 16, bark_tab_s44_128,  10, fcb44ss, 1, 6, cb4432s0, cb4432s1, 23 },
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@ -215,7 +215,7 @@ static void dec_gain(TwinVQContext *tctx,
                     enum TwinVQFrameType ftype, float *out)
 {
    const TwinVQModeTab   *mtab =  tctx->mtab;
-    const TwinVQFrameData *bits = &tctx->bits;
+    const TwinVQFrameData *bits = &tctx->bits[tctx->cur_frame];
    int i, j;
    int sub        = mtab->fmode[ftype].sub;
    float step     = TWINVQ_AMP_MAX     / ((1 << TWINVQ_GAIN_BITS)     - 1);
@ -376,11 +376,12 @@ static void imdct_and_window(TwinVQContext *tctx, enum TwinVQFrameType ftype,
 }

 static void imdct_output(TwinVQContext *tctx, enum TwinVQFrameType ftype,
-                         int wtype, float **out)
+                         int wtype, float **out, int offset)
 {
    const TwinVQModeTab *mtab = tctx->mtab;
    float *prev_buf           = tctx->prev_frame + tctx->last_block_pos[0];
    int size1, size2, i;
+    float *out1, *out2;

    for (i = 0; i < tctx->avctx->channels; i++)
        imdct_and_window(tctx, ftype, wtype,
@ -394,15 +395,17 @@ static void imdct_output(TwinVQContext *tctx, enum TwinVQFrameType ftype,
    size2 = tctx->last_block_pos[0];
    size1 = mtab->size - size2;

-    memcpy(&out[0][0],     prev_buf,         size1 * sizeof(out[0][0]));
-    memcpy(&out[0][size1], tctx->curr_frame, size2 * sizeof(out[0][0]));
+    out1 = &out[0][0] + offset;
+    memcpy(out1,         prev_buf,         size1 * sizeof(*out1));
+    memcpy(out1 + size1, tctx->curr_frame, size2 * sizeof(*out1));

    if (tctx->avctx->channels == 2) {
-        memcpy(&out[1][0], &prev_buf[2 * mtab->size],
-               size1 * sizeof(out[1][0]));
-        memcpy(&out[1][size1], &tctx->curr_frame[2 * mtab->size],
-               size2 * sizeof(out[1][0]));
-        tctx->fdsp.butterflies_float(out[0], out[1], mtab->size);
+        out2 = &out[1][0] + offset;
+        memcpy(out2, &prev_buf[2 * mtab->size],
+               size1 * sizeof(*out2));
+        memcpy(out2 + size1, &tctx->curr_frame[2 * mtab->size],
+               size2 * sizeof(*out2));
+        tctx->fdsp.butterflies_float(out1, out2, mtab->size);
    }
 }

@ -410,7 +413,7 @@ static void read_and_decode_spectrum(TwinVQContext *tctx, float *out,
                                     enum TwinVQFrameType ftype)
 {
    const TwinVQModeTab *mtab = tctx->mtab;
-    TwinVQFrameData *bits     = &tctx->bits;
+    TwinVQFrameData *bits     = &tctx->bits[tctx->cur_frame];
    int channels              = tctx->avctx->channels;
    int sub        = mtab->fmode[ftype].sub;
    int block_size = mtab->size / sub;
@ -483,7 +486,7 @@ int ff_twinvq_decode_frame(AVCodecContext *avctx, void *data,

    /* get output buffer */
    if (tctx->discarded_packets >= 2) {
-        frame->nb_samples = mtab->size;
+        frame->nb_samples = mtab->size * tctx->frames_per_packet;
        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
            return ret;
@ -500,11 +503,17 @@ int ff_twinvq_decode_frame(AVCodecContext *avctx, void *data,
    if ((ret = tctx->read_bitstream(avctx, tctx, buf, buf_size)) < 0)
        return ret;

-    read_and_decode_spectrum(tctx, tctx->spectrum, tctx->bits.ftype);
+    for (tctx->cur_frame = 0; tctx->cur_frame < tctx->frames_per_packet;
+         tctx->cur_frame++) {
+        read_and_decode_spectrum(tctx, tctx->spectrum,
+                                 tctx->bits[tctx->cur_frame].ftype);

-    imdct_output(tctx, tctx->bits.ftype, tctx->bits.window_type, out);
+        imdct_output(tctx, tctx->bits[tctx->cur_frame].ftype,
+                     tctx->bits[tctx->cur_frame].window_type, out,
+                     tctx->cur_frame * mtab->size);

-    FFSWAP(float *, tctx->curr_frame, tctx->prev_frame);
+        FFSWAP(float *, tctx->curr_frame, tctx->prev_frame);
+    }

    if (tctx->discarded_packets < 2) {
        tctx->discarded_packets++;
@ -764,6 +773,20 @@ av_cold int ff_twinvq_decode_init(AVCodecContext *avctx)
    tctx->avctx       = avctx;
    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;

+    if (!avctx->block_align) {
+        avctx->block_align = tctx->frame_size + 7 >> 3;
+    } else if (avctx->block_align * 8 < tctx->frame_size) {
+        av_log(avctx, AV_LOG_ERROR, "Block align is %d bits, expected %d\n",
+               avctx->block_align * 8, tctx->frame_size);
+        return AVERROR_INVALIDDATA;
+    }
+    tctx->frames_per_packet = avctx->block_align * 8 / tctx->frame_size;
+    if (tctx->frames_per_packet > TWINVQ_MAX_FRAMES_PER_PACKET) {
+        av_log(avctx, AV_LOG_ERROR, "Too many frames per packet (%d)\n",
+               tctx->frames_per_packet);
+        return AVERROR_INVALIDDATA;
+    }
+
    avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
    if ((ret = init_mdct_win(tctx))) {
        av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n");
--- a/libavcodec/twinvq.h
+++ b/libavcodec/twinvq.h
@ -58,6 +58,8 @@ enum TwinVQFrameType {
 #define TWINVQ_SUBBLOCKS_MAX     16
 #define TWINVQ_BARK_N_COEF_MAX   4

+#define TWINVQ_MAX_FRAMES_PER_PACKET 2
+
 /**
 * Parameters and tables that are different for each frame type
 */
@ -162,7 +164,8 @@ typedef struct TwinVQContext {
    // scratch buffers
    float *tmp_buf;

-    TwinVQFrameData bits;
+    int frame_size, frames_per_packet, cur_frame;
+    TwinVQFrameData bits[TWINVQ_MAX_FRAMES_PER_PACKET];

    enum TwinVQCodec codec;

--- a/libavcodec/twinvqdec.c
+++ b/libavcodec/twinvqdec.c
@ -251,7 +251,7 @@ static void read_cb_data(TwinVQContext *tctx, GetBitContext *gb,
 static int twinvq_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
                                 const uint8_t *buf, int buf_size)
 {
-    TwinVQFrameData     *bits = &tctx->bits;
+    TwinVQFrameData     *bits = &tctx->bits[0];
    const TwinVQModeTab *mtab = tctx->mtab;
    int channels              = tctx->avctx->channels;
    int sub;
@ -268,7 +268,7 @@ static int twinvq_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
        return AVERROR_INVALIDDATA;
    }

-    bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits.window_type];
+    bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits[0].window_type];

    sub = mtab->fmode[bits->ftype].sub;

@ -396,13 +396,17 @@ static av_cold int twinvq_decode_init(AVCodecContext *avctx)
        return -1;
    }

-    avctx->block_align = (avctx->bit_rate * tctx->mtab->size
-                                          / avctx->sample_rate + 15) / 8;
-
    tctx->codec          = TWINVQ_CODEC_VQF;
    tctx->read_bitstream = twinvq_read_bitstream;
    tctx->dec_bark_env   = dec_bark_env;
    tctx->decode_ppc     = decode_ppc;
+    tctx->frame_size     = avctx->bit_rate * tctx->mtab->size
+                                           / avctx->sample_rate + 8;
+    if (avctx->block_align && avctx->block_align * 8 / tctx->frame_size > 1) {
+        av_log(avctx, AV_LOG_ERROR,
+               "VQF TwinVQ should have only one frame per packet\n");
+        return AVERROR_INVALIDDATA;
+    }

    return ff_twinvq_decode_init(avctx);
 }