1
mirror of https://github.com/mpv-player/mpv synced 2025-01-01 04:36:24 +01:00

Minor speedup for K6-2, K7, P3

git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2836 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
nick 2001-11-11 15:35:11 +00:00
parent b66e9843f8
commit 1ffb6bfa2b
2 changed files with 68 additions and 66 deletions

View File

@ -85,40 +85,41 @@ PROFILE_START();
register int x;
#ifdef ARCH_X86
#ifdef HAVE_MMX
asm volatile(
PREFETCHW" %0\n\t"
PREFETCH" %1\n\t"
PREFETCH" %2\n\t"
"pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
::"m"(dstbase),"m"(srca),"m"(src):"memory");
for(x=0;x<w;x+=2){
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"xorl %%eax, %%eax \n\t"
"pcmpeqb %%mm6, %%mm6 \n\t" // F..F
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase
"movq %%mm0, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000
"paddb %%mm6, %%mm2 \n\t"
"punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD
"punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A
"punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B
"pmullw %%mm2, %%mm0 \n\t"
"pmullw %%mm3, %%mm1 \n\t"
"psrlw $8, %%mm0 \n\t"
"psrlw $8, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000
"punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD
"punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB
"paddb %%mm2, %%mm0 \n\t"
"movq %%mm0, (%0, %%eax, 4) \n\t"
"addl $2, %%eax \n\t"
"cmpl %3, %%eax \n\t"
" jb 1b \n\t"
:: "r" (dstbase), "r" (srca), "r" (src), "r" (w)
: "%eax"
);
PREFETCHW" 32%0\n\t"
PREFETCH" 32%1\n\t"
PREFETCH" 32%2\n\t"
"movq %0, %%mm0\n\t" // dstbase
"movq %%mm0, %%mm1\n\t"
"punpcklbw %%mm7, %%mm0\n\t"
"punpckhbw %%mm7, %%mm1\n\t"
"movd %1, %%mm2\n\t" // srca ABCD0000
"paddb %%mm6, %%mm2\n\t"
"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
"movq %%mm2, %%mm3\n\t"
"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
"pmullw %%mm2, %%mm0\n\t"
"pmullw %%mm3, %%mm1\n\t"
"psrlw $8, %%mm0\n\t"
"psrlw $8, %%mm1\n\t"
"packuswb %%mm1, %%mm0\n\t"
"movd %2, %%mm2 \n\t" // src ABCD0000
"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
"paddb %%mm2, %%mm0\n\t"
"movq %%mm0, %0\n\t"
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
}
#else /* 0 HAVE_MMX2*/
for(x=0;x<w;x++){
if(srca[x]){

View File

@ -85,40 +85,41 @@ PROFILE_START();
register int x;
#ifdef ARCH_X86
#ifdef HAVE_MMX
asm volatile(
PREFETCHW" %0\n\t"
PREFETCH" %1\n\t"
PREFETCH" %2\n\t"
"pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
::"m"(dstbase),"m"(srca),"m"(src):"memory");
for(x=0;x<w;x+=2){
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"xorl %%eax, %%eax \n\t"
"pcmpeqb %%mm6, %%mm6 \n\t" // F..F
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase
"movq %%mm0, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000
"paddb %%mm6, %%mm2 \n\t"
"punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD
"punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A
"punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B
"pmullw %%mm2, %%mm0 \n\t"
"pmullw %%mm3, %%mm1 \n\t"
"psrlw $8, %%mm0 \n\t"
"psrlw $8, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000
"punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD
"punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB
"paddb %%mm2, %%mm0 \n\t"
"movq %%mm0, (%0, %%eax, 4) \n\t"
"addl $2, %%eax \n\t"
"cmpl %3, %%eax \n\t"
" jb 1b \n\t"
:: "r" (dstbase), "r" (srca), "r" (src), "r" (w)
: "%eax"
);
PREFETCHW" 32%0\n\t"
PREFETCH" 32%1\n\t"
PREFETCH" 32%2\n\t"
"movq %0, %%mm0\n\t" // dstbase
"movq %%mm0, %%mm1\n\t"
"punpcklbw %%mm7, %%mm0\n\t"
"punpckhbw %%mm7, %%mm1\n\t"
"movd %1, %%mm2\n\t" // srca ABCD0000
"paddb %%mm6, %%mm2\n\t"
"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
"movq %%mm2, %%mm3\n\t"
"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
"pmullw %%mm2, %%mm0\n\t"
"pmullw %%mm3, %%mm1\n\t"
"psrlw $8, %%mm0\n\t"
"psrlw $8, %%mm1\n\t"
"packuswb %%mm1, %%mm0\n\t"
"movd %2, %%mm2 \n\t" // src ABCD0000
"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
"paddb %%mm2, %%mm0\n\t"
"movq %%mm0, %0\n\t"
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
}
#else /* 0 HAVE_MMX2*/
for(x=0;x<w;x++){
if(srca[x]){