From 21af69f7876d7708ee59fcb94384ab1d2f7ff14f Mon Sep 17 00:00:00 2001 From: Fabrice Bellard Date: Mon, 30 Jul 2001 23:26:26 +0000 Subject: [PATCH] use block[] in structure to have it aligned on 8 bytes for mmx optimizations - dct_unquantize is always a function pointer - added specialized dct_unquantize_h263 Originally committed as revision 22 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/mpegvideo.c | 133 ++++++++++++++++++++++++----------------- libavcodec/mpegvideo.h | 7 +++ 2 files changed, 85 insertions(+), 55 deletions(-) diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 8f56f0def2..ec4b26bf28 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -24,15 +24,17 @@ #include "dsputil.h" #include "mpegvideo.h" -#include "../config.h" - -#ifdef ARCH_X86 -#include "i386/mpegvideo.c" -#endif -#ifndef DCT_UNQUANTIZE -#define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) -#endif - +static void encode_picture(MpegEncContext *s, int picture_number); +static void rate_control_init(MpegEncContext *s); +static int rate_estimate_qscale(MpegEncContext *s); +static void dct_unquantize_mpeg1_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale); +static void dct_unquantize_h263_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale); +static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); +static int dct_quantize_mmx(MpegEncContext *s, + DCTELEM *block, int n, + int qscale); #define EDGE_WIDTH 16 /* enable all paranoid tests for rounding, overflows, etc... */ @@ -59,10 +61,6 @@ static UINT8 h263_chroma_roundtab[16] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, }; -static void encode_picture(MpegEncContext *s, int picture_number); -static void rate_control_init(MpegEncContext *s); -static int rate_estimate_qscale(MpegEncContext *s); - /* default motion estimation */ int motion_estimation_method = ME_LOG; @@ -98,8 +96,13 @@ int MPV_common_init(MpegEncContext *s) int c_size, i; UINT8 *pict; -#if defined ( HAVE_MMX ) && defined ( BIN_PORTABILITY ) - MPV_common_init_mmx(); + if (s->out_format == FMT_H263) + s->dct_unquantize = dct_unquantize_h263_c; + else + s->dct_unquantize = dct_unquantize_mpeg1_c; + +#ifdef HAVE_MMX + MPV_common_init_mmx(s); #endif s->mb_width = (s->width + 15) / 16; s->mb_height = (s->height + 15) / 16; @@ -358,7 +361,6 @@ static void draw_edges(UINT8 *buf, int wrap, int width, int height, int w) } /* generic function for encode/decode called before a frame is coded/decoded */ -#ifndef ARCH_X86 void MPV_frame_start(MpegEncContext *s) { int i; @@ -378,7 +380,7 @@ void MPV_frame_start(MpegEncContext *s) } } } -#endif + /* generic function for encode/decode called after a frame has been coded/decoded */ void MPV_frame_end(MpegEncContext *s) { @@ -461,12 +463,6 @@ static inline int clip(int a, int amin, int amax) return a; } -static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); -static int dct_quantize_mmx(MpegEncContext *s, - DCTELEM *block, int n, - int qscale); -static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); - /* apply one mpeg motion vector to the three components */ static inline void mpeg_motion(MpegEncContext *s, UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, @@ -633,7 +629,7 @@ static inline void put_dct(MpegEncContext *s, DCTELEM *block, int i, UINT8 *dest, int line_size) { if (!s->mpeg2) - DCT_UNQUANTIZE(s, block, i, s->qscale); + s->dct_unquantize(s, block, i, s->qscale); j_rev_dct (block); put_pixels_clamped(block, dest, line_size); } @@ -644,7 +640,7 @@ static inline void add_dct(MpegEncContext *s, { if (s->block_last_index[i] >= 0) { if (!s->mpeg2) - DCT_UNQUANTIZE(s, block, i, s->qscale); + s->dct_unquantize(s, block, i, s->qscale); j_rev_dct (block); add_pixels_clamped(block, dest, line_size); } @@ -740,7 +736,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) UINT8 *mbskip_ptr; /* avoid copy if macroblock skipped in last frame too */ - if (!s->encoding) { + if (!s->encoding && s->pict_type != B_TYPE) { mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; if (s->mb_skiped) { s->mb_skiped = 0; @@ -810,7 +806,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) { int mb_x, mb_y, wrap; UINT8 *ptr; - DCTELEM block[6][64]; int i, motion_x, motion_y; s->picture_number = picture_number; @@ -879,17 +874,17 @@ static void encode_picture(MpegEncContext *s, int picture_number) /* get the pixels */ wrap = s->linesize; ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; - get_pixels(block[0], ptr, wrap); - get_pixels(block[1], ptr + 8, wrap); - get_pixels(block[2], ptr + 8 * wrap, wrap); - get_pixels(block[3], ptr + 8 * wrap + 8, wrap); + get_pixels(s->block[0], ptr, wrap); + get_pixels(s->block[1], ptr + 8, wrap); + get_pixels(s->block[2], ptr + 8 * wrap, wrap); + get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap); wrap = s->linesize >> 1; ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; - get_pixels(block[4], ptr, wrap); + get_pixels(s->block[4], ptr, wrap); wrap = s->linesize >> 1; ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; - get_pixels(block[5], ptr, wrap); + get_pixels(s->block[5], ptr, wrap); /* subtract previous frame if non intra */ if (!s->mb_intra) { @@ -900,10 +895,10 @@ static void encode_picture(MpegEncContext *s, int picture_number) ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + (mb_x * 16 + (motion_x >> 1)); - sub_pixels_2(block[0], ptr, s->linesize, dxy); - sub_pixels_2(block[1], ptr + 8, s->linesize, dxy); - sub_pixels_2(block[2], ptr + s->linesize * 8, s->linesize, dxy); - sub_pixels_2(block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); + sub_pixels_2(s->block[0], ptr, s->linesize, dxy); + sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy); + sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy); + sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); if (s->out_format == FMT_H263) { /* special rounding for h263 */ @@ -923,9 +918,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) } offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); ptr = s->last_picture[1] + offset; - sub_pixels_2(block[4], ptr, s->linesize >> 1, dxy); + sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy); ptr = s->last_picture[2] + offset; - sub_pixels_2(block[5], ptr, s->linesize >> 1, dxy); + sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy); } emms_c(); @@ -943,25 +938,25 @@ static void encode_picture(MpegEncContext *s, int picture_number) for(i=0;i<6;i++) { int last_index; if (av_fdct == jpeg_fdct_ifast) - last_index = dct_quantize(s, block[i], i, s->qscale); + last_index = dct_quantize(s, s->block[i], i, s->qscale); else - last_index = dct_quantize_mmx(s, block[i], i, s->qscale); + last_index = dct_quantize_mmx(s, s->block[i], i, s->qscale); s->block_last_index[i] = last_index; } /* huffman encode */ switch(s->out_format) { case FMT_MPEG1: - mpeg1_encode_mb(s, block, motion_x, motion_y); + mpeg1_encode_mb(s, s->block, motion_x, motion_y); break; case FMT_H263: if (s->h263_msmpeg4) - msmpeg4_encode_mb(s, block, motion_x, motion_y); + msmpeg4_encode_mb(s, s->block, motion_x, motion_y); else - h263_encode_mb(s, block, motion_x, motion_y); + h263_encode_mb(s, s->block, motion_x, motion_y); break; case FMT_MJPEG: - mjpeg_encode_mb(s, block); + mjpeg_encode_mb(s, s->block); break; } @@ -969,7 +964,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[0][0][0] = motion_x; s->mv[0][0][1] = motion_y; - MPV_decode_mb(s, block); + MPV_decode_mb(s, s->block); } } } @@ -1121,9 +1116,8 @@ static int dct_quantize_mmx(MpegEncContext *s, return last_non_zero; } -#ifndef HAVE_DCT_UNQUANTIZE -static void dct_unquantize(MpegEncContext *s, - DCTELEM *block, int n, int qscale) +static void dct_unquantize_mpeg1_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale) { int i, level; const UINT16 *quant_matrix; @@ -1133,10 +1127,6 @@ static void dct_unquantize(MpegEncContext *s, block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; - if (s->out_format == FMT_H263) { - i = 1; - goto unquant_even; - } /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; for(i=1;i<64;i++) { @@ -1160,7 +1150,6 @@ static void dct_unquantize(MpegEncContext *s, } } else { i = 0; - unquant_even: quant_matrix = s->non_intra_matrix; for(;i<64;i++) { level = block[i]; @@ -1185,7 +1174,41 @@ static void dct_unquantize(MpegEncContext *s, } } } -#endif + +static void dct_unquantize_h263_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, qmul, qadd; + + if (s->mb_intra) { + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + i = 1; + } else { + i = 0; + } + + qmul = s->qscale << 1; + qadd = (s->qscale - 1) | 1; + + for(;i<64;i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; + } +#ifdef PARANOID + if (level < -2048 || level > 2047) + fprintf(stderr, "unquant error %d %d\n", i, level); +#endif + block[i] = level; + } + } +} /* rate control */ diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index a097b01b00..71f8b139cf 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -173,6 +173,10 @@ typedef struct MpegEncContext { int interlaced_dct; int last_qscale; int first_slice; + + DCTELEM block[6][64] __align8; + void (*dct_unquantize)(struct MpegEncContext *s, + DCTELEM *block, int n, int qscale); } MpegEncContext; extern const UINT8 zigzag_direct[64]; @@ -182,6 +186,9 @@ void MPV_common_end(MpegEncContext *s); void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); void MPV_frame_start(MpegEncContext *s); void MPV_frame_end(MpegEncContext *s); +#ifdef HAVE_MMX +void MPV_common_init_mmx(MpegEncContext *s); +#endif /* motion_est.c */