Change rounding of the horizontal DWT to match the vertical one.

This allows some simplifications and optimizations and should
not have any effect on quality.

Originally committed as revision 10172 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2007-08-21 16:29:40 +00:00
parent 7506d47aa3
commit ce611a27be
6 changed files with 51 additions and 61 deletions

View File

@ -111,8 +111,7 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
i = 0;
asm volatile(
"pcmpeqd %%xmm7, %%xmm7 \n\t"
"psrad $29, %%xmm7 \n\t"
"pslld $1, %%xmm7 \n\t"
::);
for(; i<w_l-7; i+=8){
asm volatile(
@ -157,25 +156,21 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
"movdqu 20(%1), %%xmm6 \n\t"
"paddd (%1), %%xmm2 \n\t"
"paddd 16(%1), %%xmm6 \n\t"
"movdqa %%xmm2, %%xmm0 \n\t"
"movdqa %%xmm6, %%xmm4 \n\t"
"pslld $2, %%xmm2 \n\t"
"pslld $2, %%xmm6 \n\t"
"psubd %%xmm2, %%xmm0 \n\t"
"psubd %%xmm6, %%xmm4 \n\t"
"psrad $1, %%xmm0 \n\t"
"psrad $1, %%xmm4 \n\t"
"movdqu (%0), %%xmm2 \n\t"
"movdqu 16(%0), %%xmm6 \n\t"
"psubd %%xmm0, %%xmm2 \n\t"
"psubd %%xmm4, %%xmm6 \n\t"
"movdqu (%0), %%xmm0 \n\t"
"movdqu 16(%0), %%xmm4 \n\t"
"paddd %%xmm2, %%xmm0 \n\t"
"paddd %%xmm6, %%xmm4 \n\t"
"psrad $1, %%xmm2 \n\t"
"psrad $1, %%xmm6 \n\t"
"paddd %%xmm0, %%xmm2 \n\t"
"paddd %%xmm4, %%xmm6 \n\t"
"movdqa %%xmm2, (%2) \n\t"
"movdqa %%xmm6, 16(%2) \n\t"
:: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
: "memory"
);
}
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
}
{
@ -291,10 +286,9 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
DWTELEM * const ref = b+w2 - 1;
i = 1;
b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
asm volatile(
"pcmpeqd %%mm7, %%mm7 \n\t"
"psrld $29, %%mm7 \n\t"
"pslld $1, %%mm7 \n\t"
::);
for(; i<w_l-3; i+=4){
asm volatile(
@ -333,16 +327,12 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
"movq 12(%1), %%mm6 \n\t"
"paddd (%1), %%mm2 \n\t"
"paddd 8(%1), %%mm6 \n\t"
"pxor %%mm0, %%mm0 \n\t" //note: the 2 xor could be avoided if we would flip the rounding direction
"pxor %%mm4, %%mm4 \n\t"
"psubd %%mm2, %%mm0 \n\t"
"psubd %%mm6, %%mm4 \n\t"
"psrad $1, %%mm0 \n\t"
"psrad $1, %%mm4 \n\t"
"psubd %%mm0, %%mm2 \n\t"
"psubd %%mm4, %%mm6 \n\t"
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm4 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm6, %%mm4 \n\t"
"psrad $1, %%mm2 \n\t"
"psrad $1, %%mm6 \n\t"
"paddd %%mm0, %%mm2 \n\t"
"paddd %%mm4, %%mm6 \n\t"
"movq %%mm2, (%2) \n\t"
@ -351,7 +341,7 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
: "memory"
);
}
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
}
{

View File

@ -775,7 +775,7 @@ static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int
int i;
assert(shift == 4);
#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*4*(src) + 4*(ref) + add + 5 + (5<<27))/(5*16) - (1<<23)))
if(mirror_left){
dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
dst += dst_step;
@ -1113,8 +1113,8 @@ static void horizontal_decompose97i(DWTELEM *b, int width){
DWTELEM temp[width];
const int w2= (width+1)>>1;
lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
}
@ -1150,7 +1150,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
#ifdef liftS
b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
#else
b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
#endif
}
}
@ -1344,8 +1344,8 @@ void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO-1, W_BS, 0, 1);
lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
}
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){

View File

@ -165,11 +165,11 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE
static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
for(; i<w; i++){
dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO-1 + 4 * src[i]) >> W_BS);
dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
}
if(width&1){
dst[w] = src[w] + ((2 * ref[w] + W_BO-1 + 4 * src[w]) >> W_BS);
dst[w] = src[w] + ((2 * ref[w] + W_BO + 4 * src[w]) >> W_BS);
}
}

View File

@ -141,9 +141,9 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi
2654678 ./tests/data/a-ffv1.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
9078723c943de5d79490f54b99e6ea9e *./tests/data/a-snow.avi
156656 ./tests/data/a-snow.avi
f2932084b52e2ede167c9ba21eae0656 *./tests/data/out.yuv
958d649d09b7361d5f00b5b3fcccbcd2 *./tests/data/a-snow.avi
156606 ./tests/data/a-snow.avi
b19cb7f9134f922326028c6bb44e96de *./tests/data/out.yuv
stddev: 23.14 PSNR:20.83 bytes:7602176
ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi
3519486 ./tests/data/a-snow53.avi

View File

@ -141,9 +141,9 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi
3525804 ./tests/data/a-ffv1.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
40a6e938ac2bd92ee12cd57925e86454 *./tests/data/a-snow.avi
68758 ./tests/data/a-snow.avi
1e356854142898c7c4aab4bfedadf235 *./tests/data/out.yuv
2cfa1bdb443d04a890208a83fd239461 *./tests/data/a-snow.avi
68872 ./tests/data/a-snow.avi
64a0495b7ab53509d3b791465262795c *./tests/data/out.yuv
stddev: 10.86 PSNR:27.40 bytes:7602176
3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi
2721980 ./tests/data/a-snow53.avi

View File

@ -2046,51 +2046,51 @@ ret: 0 st:-1 ts:-0.645825 flags:1
ret: 0 st: 0 dts:0.040000 pts:0.040000 pos:9610 size:1075 flags:0
----------------
tests/data/a-snow.avi
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-snow53.avi