mpegaudiodec: change imdct window arrangment for better pointer alignment

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
2012-01-04 21:43:47 +01:00 · 2012-01-04 21:43:47 +01:00 · 06677d0dd9
parent 6dfcf53092
commit 06677d0dd9
2 changed files with 22 additions and 13 deletions
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@ -20,6 +20,7 @@
 #define AVCODEC_MPEGAUDIODSP_H

 #include <stdint.h>
+#include "libavutil/common.h"

 typedef struct MPADSPContext {
    void (*apply_window_float)(float *synth_buf, float *window,
@ -74,7 +75,10 @@ void ff_imdct36_blocks_fixed(int *out, int *buf, int *in,
 void ff_init_mpadsp_tabs_float(void);
 void ff_init_mpadsp_tabs_fixed(void);

-extern int ff_mdct_win_fixed[8][36];
-extern float ff_mdct_win_float[8][36];
+/** For SSE implementation, MDCT_BUF_SIZE/2 should be 128-bit aligned */
+#define MDCT_BUF_SIZE FFALIGN(36, 2*4)
+
+extern int ff_mdct_win_fixed[8][MDCT_BUF_SIZE];
+extern float ff_mdct_win_float[8][MDCT_BUF_SIZE];

 #endif /* AVCODEC_MPEGAUDIODSP_H */
--- a/libavcodec/mpegaudiodsp_template.c
+++ b/libavcodec/mpegaudiodsp_template.c
@ -69,8 +69,11 @@ static inline int round_sample(int64_t *sum)
 #   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
 #endif

-/** Window for MDCT. */
-DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][36];
+/** Window for MDCT. Actually only the elements in [0,17] and
+    [MDCT_BUF_SIZE/2, MDCT_BUF_SIZE/2 + 17] are actually used. The rest
+    is just to preserve alignment for SIMD implementations.
+*/
+DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][MDCT_BUF_SIZE];

 DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];

@ -244,15 +247,17 @@ void RENAME(ff_init_mpadsp_tabs)(void)

            if (j == 2)
                RENAME(ff_mdct_win)[j][i/3] = FIXHR((d / (1<<5)));
-            else
-                RENAME(ff_mdct_win)[j][i  ] = FIXHR((d / (1<<5)));
+            else {
+                int idx = i < 18 ? i : i + (MDCT_BUF_SIZE/2 - 18);
+                RENAME(ff_mdct_win)[j][idx] = FIXHR((d / (1<<5)));
+            }
        }
    }

    /* NOTE: we do frequency inversion adter the MDCT by changing
        the sign of the right window coefs */
    for (j = 0; j < 4; j++) {
-        for (i = 0; i < 36; i += 2) {
+        for (i = 0; i < MDCT_BUF_SIZE; i += 2) {
            RENAME(ff_mdct_win)[j + 4][i    ] =  RENAME(ff_mdct_win)[j][i    ];
            RENAME(ff_mdct_win)[j + 4][i + 1] = -RENAME(ff_mdct_win)[j][i + 1];
        }
@ -353,15 +358,15 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win)
        t1 = s0 - s1;
        out[(9 + j) * SBLIMIT] = MULH3(t1, win[     9 + j], 1) + buf[4*(9 + j)];
        out[(8 - j) * SBLIMIT] = MULH3(t1, win[     8 - j], 1) + buf[4*(8 - j)];
-        buf[4 * ( 9 + j     )] = MULH3(t0, win[18 + 9 + j], 1);
-        buf[4 * ( 8 - j     )] = MULH3(t0, win[18 + 8 - j], 1);
+        buf[4 * ( 9 + j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + j], 1);
+        buf[4 * ( 8 - j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - j], 1);

        t0 = s2 + s3;
        t1 = s2 - s3;
        out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[     9 + 8 - j], 1) + buf[4*(9 + 8 - j)];
        out[         j  * SBLIMIT] = MULH3(t1, win[             j], 1) + buf[4*(        j)];
-        buf[4 * ( 9 + 8 - j     )] = MULH3(t0, win[18 + 9 + 8 - j], 1);
-        buf[4 * (         j     )] = MULH3(t0, win[18         + j], 1);
+        buf[4 * ( 9 + 8 - j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 8 - j], 1);
+        buf[4 * (         j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2         + j], 1);
        i += 4;
    }

@ -371,8 +376,8 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win)
    t1 = s0 - s1;
    out[(9 + 4) * SBLIMIT] = MULH3(t1, win[     9 + 4], 1) + buf[4*(9 + 4)];
    out[(8 - 4) * SBLIMIT] = MULH3(t1, win[     8 - 4], 1) + buf[4*(8 - 4)];
-    buf[4 * ( 9 + 4     )] = MULH3(t0, win[18 + 9 + 4], 1);
-    buf[4 * ( 8 - 4     )] = MULH3(t0, win[18 + 8 - 4], 1);
+    buf[4 * ( 9 + 4     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 4], 1);
+    buf[4 * ( 8 - 4     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - 4], 1);
 }

 void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in,