huffyuvdec: implement trick for BGR

Before:
179436 decicycles in rgb, 32735 runs, 33 skips
Stripped object size: 23188

After:
154861 decicycles in rgb, 32738 runs, 30 skips
Stripped object size: 22736

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Christophe Gisquet 2014-06-03 19:51:52 +00:00 committed by Michael Niedermayer
parent 11b4703813
commit deadcf5e71
1 changed files with 73 additions and 12 deletions

View File

@ -720,28 +720,89 @@ static void decode_gray_bitstream(HYuvContext *s, int count)
CLOSE_READER(re, &s->gb);
}
#define GET_VLC_DUAL(dst0, dst1, name, gb, dtable, table1, table2, \
bits, max_depth, rsvd ) \
do { \
unsigned int index = SHOW_UBITS(name, gb, bits); \
int code = dtable[index][0]; \
int n = dtable[index][1]; \
\
if (code != rsvd && n>0) { \
dst0 = code>>8; \
dst1 = code; \
LAST_SKIP_BITS(name, gb, n); \
} else { \
int nb_bits; \
DUAL_INTERN(dst0, table1, gb, name, bits, max_depth); \
\
UPDATE_CACHE(re, gb); \
index = SHOW_UBITS(name, gb, bits); \
DUAL_INTERN(dst1, table2, gb, name, bits, max_depth); \
} \
} while (0)
#define READ_2PIX(dst0, dst1, plane1)\
UPDATE_CACHE(re, &s->gb); \
GET_VLC_DUAL(dst0, dst1, re, &s->gb, s->vlc[4+plane1].table, \
s->vlc[0].table, s->vlc[plane1].table, \
VLC_BITS, 3, 0xffff)
static av_always_inline void decode_bgr_1(HYuvContext *s, int count,
int decorrelate, int alpha)
{
int i;
OPEN_READER(re, &s->gb);
for (i = 0; i < count; i++) {
int code = get_vlc2(&s->gb, s->vlc[4].table, VLC_BITS, 1);
unsigned int index;
int code, n;
UPDATE_CACHE(re, &s->gb);
index = SHOW_UBITS(re, &s->gb, VLC_BITS);
code = s->vlc[4].table[index][0];
n = s->vlc[4].table[index][1];
if (code != -1) {
*(uint32_t*)&s->temp[0][4 * i] = s->pix_bgr_map[code];
} else if(decorrelate) {
s->temp[0][4 * i + G] = get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
s->temp[0][4 * i + B] = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) +
s->temp[0][4 * i + G];
s->temp[0][4 * i + R] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) +
s->temp[0][4 * i + G];
LAST_SKIP_BITS(re, &s->gb, n);
} else {
s->temp[0][4 * i + B] = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
s->temp[0][4 * i + G] = get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
s->temp[0][4 * i + R] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
int nb_bits;
if(decorrelate) {
DUAL_INTERN(s->temp[0][4 * i + G], s->vlc[1].table,
&s->gb, re, VLC_BITS, 3);
UPDATE_CACHE(re, &s->gb);
index = SHOW_UBITS(re, &s->gb, VLC_BITS);
DUAL_INTERN(code, s->vlc[0].table, &s->gb, re, VLC_BITS, 3);
s->temp[0][4 * i + B] = code + s->temp[0][4 * i + G];
UPDATE_CACHE(re, &s->gb);
index = SHOW_UBITS(re, &s->gb, VLC_BITS);
DUAL_INTERN(code, s->vlc[2].table, &s->gb, re, VLC_BITS, 3);
s->temp[0][4 * i + R] = code + s->temp[0][4 * i + G];
} else {
DUAL_INTERN(s->temp[0][4 * i + B], s->vlc[0].table,
&s->gb, re, VLC_BITS, 3);
UPDATE_CACHE(re, &s->gb);
index = SHOW_UBITS(re, &s->gb, VLC_BITS);
DUAL_INTERN(s->temp[0][4 * i + G], s->vlc[1].table,
&s->gb, re, VLC_BITS, 3);
UPDATE_CACHE(re, &s->gb);
index = SHOW_UBITS(re, &s->gb, VLC_BITS);
DUAL_INTERN(s->temp[0][4 * i + R], s->vlc[2].table,
&s->gb, re, VLC_BITS, 3);
}
if (alpha) {
UPDATE_CACHE(re, &s->gb);
index = SHOW_UBITS(re, &s->gb, VLC_BITS);
DUAL_INTERN(s->temp[0][4 * i + A], s->vlc[2].table,
&s->gb, re, VLC_BITS, 3);
}
}
if (alpha)
s->temp[0][4 * i + A] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
}
CLOSE_READER(re, &s->gb);
}
static void decode_bgr_bitstream(HYuvContext *s, int count)