Rip out 3DNOW support

Ancient AMD specific enhancement to the MMX instruction set. Officually
discontinued by AMD.

Note that support for this was already disabled in the previous commit.
This commit removes the actual code.
This commit is contained in:
wm4 2012-07-29 17:31:38 +02:00
parent 74df1d8e05
commit 43da1e78c4
13 changed files with 6 additions and 270 deletions

1
configure vendored
View File

@ -4695,7 +4695,6 @@ $def_ebx_available
$(ff_config_enable "$arch_all" "$arch" "ARCH")
$(ff_config_enable "$subarch_all" "$subarch" "ARCH")
#define HAVE_AMD3DNOW 0
#define HAVE_MMX ARCH_X86
#define HAVE_MMX2 ARCH_X86
#define HAVE_SSE ARCH_X86

View File

@ -35,8 +35,6 @@ typedef struct cpucaps_s {
bool isX86;
bool hasMMX;
bool hasMMX2;
bool has3DNow;
bool has3DNowExt;
bool hasSSE;
bool hasSSE2;
bool hasSSE3;

View File

@ -189,10 +189,6 @@ static int init(sh_audio_t *sh_audio)
#endif
if(gCpuCaps.hasMMX) a52_accel|=MM_ACCEL_X86_MMX;
if(gCpuCaps.hasMMX2) a52_accel|=MM_ACCEL_X86_MMXEXT;
if(gCpuCaps.has3DNow) a52_accel|=MM_ACCEL_X86_3DNOW;
#ifdef MM_ACCEL_X86_3DNOWEXT
if(gCpuCaps.has3DNowExt) a52_accel|=MM_ACCEL_X86_3DNOWEXT;
#endif
a52_state=a52_init (a52_accel);
if (a52_state == NULL) {
mp_msg(MSGT_DECAUDIO,MSGL_ERR,"A52 init failed\n");

View File

@ -455,9 +455,7 @@ void *decode_video(sh_video_t *sh_video, struct demux_packet *packet,
#if HAVE_MMX
// some codecs are broken, and doesn't restore MMX state :(
// it happens usually with broken/damaged files.
if (gCpuCaps.has3DNow) {
__asm__ volatile("femms\n\t":::"memory");
} else if (gCpuCaps.hasMMX) {
if (gCpuCaps.hasMMX) {
__asm__ volatile("emms\n\t":::"memory");
}
#endif

View File

@ -21,8 +21,6 @@
#define PULLUP_CPU_MMX 1
#define PULLUP_CPU_MMX2 2
#define PULLUP_CPU_3DNOW 4
#define PULLUP_CPU_3DNOWEXT 8
#define PULLUP_CPU_SSE 16
#define PULLUP_CPU_SSE2 32

View File

@ -445,23 +445,6 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
); \
} while (--lines);
static inline struct metrics
block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
int lines, struct vf_priv_s *p, struct frame_stats *s)
{
struct metrics tm;
#if !HAVE_AMD3DNOW
mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
#else
static const unsigned long long ones = 0x0101010101010101ull;
BLOCK_METRICS_TEMPLATE();
__asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
get_block_stats(&tm, p, s);
#endif
return tm;
}
#undef PSUMBW
#undef PSADBW
#undef PMAXUB
@ -797,9 +780,6 @@ static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
if (p->mmx2 == 1) {
for (i = 0; i < w; i += 8)
block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
} else if (p->mmx2 == 2) {
for (i = 0; i < w; i += 8)
block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
} else if (p->fast > 3) {
for (i = 0; i < w; i += 8)
block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
@ -1426,7 +1406,7 @@ static int vf_open(vf_instance_t *vf, char *args)
p->dint_thres = 4;
p->luma_only = 0;
p->fast = 3;
p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
p->mmx2 = gCpuCaps.hasMMX2;
if (args) {
const char *args_remain = parse_args(p, args);
if (args_remain) {
@ -1444,9 +1424,6 @@ static int vf_open(vf_instance_t *vf, char *args)
p->mmx2 = 0;
#if !HAVE_MMX
p->mmx2 = 0;
#endif
#if !HAVE_AMD3DNOW
p->mmx2 &= 1;
#endif
p->thres.odd = p->thres.even;
p->thres.temp = p->thres.noise;

View File

@ -49,8 +49,7 @@ static int config(struct vf_instance *vf,
unsigned int voflags, unsigned int outfmt){
int flags=
(gCpuCaps.hasMMX ? PP_CPU_CAPS_MMX : 0)
| (gCpuCaps.hasMMX2 ? PP_CPU_CAPS_MMX2 : 0)
| (gCpuCaps.has3DNow ? PP_CPU_CAPS_3DNOW : 0);
| (gCpuCaps.hasMMX2 ? PP_CPU_CAPS_MMX2 : 0);
switch(outfmt){
case IMGFMT_444P: flags|= PP_FORMAT_444; break;

View File

@ -66,8 +66,6 @@ static void init_pullup(struct vf_instance *vf, mp_image_t *mpi)
if (gCpuCaps.hasMMX) c->cpu |= PULLUP_CPU_MMX;
if (gCpuCaps.hasMMX2) c->cpu |= PULLUP_CPU_MMX2;
if (gCpuCaps.has3DNow) c->cpu |= PULLUP_CPU_3DNOW;
if (gCpuCaps.has3DNowExt) c->cpu |= PULLUP_CPU_3DNOWEXT;
if (gCpuCaps.hasSSE) c->cpu |= PULLUP_CPU_SSE;
if (gCpuCaps.hasSSE2) c->cpu |= PULLUP_CPU_SSE2;

View File

@ -661,8 +661,7 @@ float sws_lum_sharpen= 0.0;
int get_sws_cpuflags(void){
return
(gCpuCaps.hasMMX ? SWS_CPU_CAPS_MMX : 0)
| (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0)
| (gCpuCaps.has3DNow ? SWS_CPU_CAPS_3DNOW : 0);
| (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0);
}
void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam)

View File

@ -71,42 +71,6 @@ static void deint(unsigned char *dest, int ds, unsigned char *src, int ss, int w
fast_memcpy(dest, src, w);
}
#if HAVE_AMD3DNOW
static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
{
int i, j, ssd=ss;
long crap1, crap2;
if (up) {
ssd = -ss;
fast_memcpy(d, s, w);
d += ds;
s += ss;
}
for (i=h-1; i; i--) {
__asm__ volatile(
"1: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t"
"movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
"pavgusb %%mm0, %%mm1 \n\t"
"add $8, %%"REG_S" \n\t"
"pavgusb %%mm0, %%mm1 \n\t"
"movq %%mm1, (%%"REG_D") \n\t"
"add $8, %%"REG_D" \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t"
: "=S"(crap1), "=D"(crap2)
: "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd)
);
for (j=w-(w&7); j<w; j++)
d[j] = (s[j+ssd] + 3*s[j])>>2;
d += ds;
s += ss;
}
if (!up) fast_memcpy(d, s, w);
__asm__ volatile("emms \n\t" : : : "memory");
}
#endif
#if HAVE_MMX2
static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
{
@ -498,9 +462,6 @@ static int vf_open(vf_instance_t *vf, char *args)
#endif
#if HAVE_MMX2
if(gCpuCaps.hasMMX2) qpel_li = qpel_li_MMX2;
#endif
#if HAVE_AMD3DNOW
if(gCpuCaps.has3DNow) qpel_li = qpel_li_3DNOW;
#endif
return 1;
}

View File

@ -36,46 +36,29 @@ static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF00
static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
#endif
#define CONFIG_RUNTIME_CPUDETECT 1
//Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
//Note: we have C, X86-nommx, MMX, MMX2
//Plain C versions
#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_C
#endif
#if ARCH_X86
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_MMX
#endif
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_MMX2
#endif
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_3DNOW
#endif
#endif /* ARCH_X86 */
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX 0
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#if ! ARCH_X86
#ifdef COMPILE_C
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX 0
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#define RENAME(a) a ## _C
#include "osd_template.c"
#endif
@ -87,10 +70,8 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX 0
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#define RENAME(a) a ## _X86
#include "osd_template.c"
#endif
@ -100,10 +81,8 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX 1
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
#include "osd_template.c"
#endif
@ -113,37 +92,19 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX 1
#define HAVE_MMX2 1
#define HAVE_AMD3DNOW 0
#define RENAME(a) a ## _MMX2
#include "osd_template.c"
#endif
//3DNOW versions
#ifdef COMPILE_3DNOW
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX 1
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 1
#define RENAME(a) a ## _3DNow
#include "osd_template.c"
#endif
#endif /* ARCH_X86 */
void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered by speed / fastest first
if(gCpuCaps.hasMMX2)
vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.has3DNow)
vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.hasMMX)
vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
else
@ -151,29 +112,13 @@ void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, in
#else
vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#else //CONFIG_RUNTIME_CPUDETECT
#if HAVE_MMX2
vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_AMD3DNOW
vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_MMX
vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
#elif ARCH_X86
vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
}
void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered by speed / fastest first
if(gCpuCaps.hasMMX2)
vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.has3DNow)
vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.hasMMX)
vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
else
@ -181,29 +126,13 @@ void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, in
#else
vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#else //CONFIG_RUNTIME_CPUDETECT
#if HAVE_MMX2
vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_AMD3DNOW
vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_MMX
vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
#elif ARCH_X86
vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
}
void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered by speed / fastest first
if(gCpuCaps.hasMMX2)
vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.has3DNow)
vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.hasMMX)
vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
else
@ -211,29 +140,13 @@ void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, in
#else
vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#else //CONFIG_RUNTIME_CPUDETECT
#if HAVE_MMX2
vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_AMD3DNOW
vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_MMX
vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
#elif ARCH_X86
vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
}
void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered by speed / fastest first
if(gCpuCaps.hasMMX2)
vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.has3DNow)
vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.hasMMX)
vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
else
@ -241,29 +154,13 @@ void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, i
#else
vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#else //CONFIG_RUNTIME_CPUDETECT
#if HAVE_MMX2
vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_AMD3DNOW
vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_MMX
vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
#elif ARCH_X86
vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
}
void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered by speed / fastest first
if(gCpuCaps.hasMMX2)
vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.has3DNow)
vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
else if(gCpuCaps.hasMMX)
vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
else
@ -271,19 +168,6 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
#else
vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#else //CONFIG_RUNTIME_CPUDETECT
#if HAVE_MMX2
vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_AMD3DNOW
vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif HAVE_MMX
vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
#elif ARCH_X86
vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
}
#ifdef FAST_OSD_TABLE
@ -304,13 +188,10 @@ void vo_draw_alpha_init(void){
//FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
if( mp_msg_test(MSGT_OSD,MSGL_V) )
{
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered per speed fasterst first
if(gCpuCaps.hasMMX2)
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
else if(gCpuCaps.has3DNow)
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
else if(gCpuCaps.hasMMX)
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
else
@ -318,19 +199,6 @@ void vo_draw_alpha_init(void){
#else
mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
#endif
#else //CONFIG_RUNTIME_CPUDETECT
#if HAVE_MMX2
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
#elif HAVE_AMD3DNOW
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
#elif HAVE_MMX
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
#elif ARCH_X86
mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
#else
mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
}
}

View File

@ -24,11 +24,7 @@
#undef PREFETCHW
#undef PAVGB
#if HAVE_AMD3DNOW
#define PREFETCH "prefetch"
#define PREFETCHW "prefetchw"
#define PAVGB "pavgusb"
#elif HAVE_MMX2
#if HAVE_MMX2
#define PREFETCH "prefetchnta"
#define PREFETCHW "prefetcht0"
#define PAVGB "pavgb"
@ -37,12 +33,7 @@
#define PREFETCHW " # nop"
#endif
#if HAVE_AMD3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
#endif
static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
int y;
@ -324,12 +315,6 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
dstbase++;
#endif
#if HAVE_MMX
#if HAVE_AMD3DNOW
__asm__ volatile(
"pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
::);
#else /* HAVE_AMD3DNOW */
__asm__ volatile(
"pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
@ -337,48 +322,11 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
"psllw $8, %%mm5\n\t" //FF00FF00FF00
"psrlw $8, %%mm4\n\t" //00FF00FF00FF
::);
#endif /* HAVE_AMD3DNOW */
#endif /* HAVE_MMX */
for(y=0;y<h;y++){
register int x;
#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
#if HAVE_MMX
#if HAVE_AMD3DNOW
__asm__ volatile(
PREFETCHW" %0\n\t"
PREFETCH" %1\n\t"
PREFETCH" %2\n\t"
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
for(x=0;x<w;x+=2){
if(srca[x] || srca[x+1])
__asm__ volatile(
PREFETCHW" 32%0\n\t"
PREFETCH" 32%1\n\t"
PREFETCH" 32%2\n\t"
"movq %0, %%mm0\n\t" // dstbase
"movq %%mm0, %%mm1\n\t"
"punpcklbw %%mm7, %%mm0\n\t"
"punpckhbw %%mm7, %%mm1\n\t"
"movd %1, %%mm2\n\t" // srca ABCD0000
"paddb %%mm6, %%mm2\n\t"
"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
"movq %%mm2, %%mm3\n\t"
"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
"pmullw %%mm2, %%mm0\n\t"
"pmullw %%mm3, %%mm1\n\t"
"psrlw $8, %%mm0\n\t"
"psrlw $8, %%mm1\n\t"
"packuswb %%mm1, %%mm0\n\t"
"movd %2, %%mm2 \n\t" // src ABCD0000
"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
"paddb %%mm2, %%mm0\n\t"
"movq %%mm0, %0\n\t"
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
}
#else //this is faster for intels crap
__asm__ volatile(
PREFETCHW" %0\n\t"
PREFETCH" %1\n\t"
@ -430,7 +378,6 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF)
: "%eax");
}
#endif
#else /* HAVE_MMX */
for(x=0;x<w;x++){
if(srca[x]){

View File

@ -1020,8 +1020,6 @@ static void WINAPI expGetSystemInfo(SYSTEM_INFO* si)
PF[PF_XMMI_INSTRUCTIONS_AVAILABLE] = TRUE;
if (gCpuCaps.hasSSE2)
PF[PF_XMMI64_INSTRUCTIONS_AVAILABLE] = TRUE;
if (gCpuCaps.has3DNow)
PF[PF_AMD3D_INSTRUCTIONS_AVAILABLE] = TRUE;
cachedsi.dwProcessorType = PROCESSOR_INTEL_PENTIUM;
cachedsi.wProcessorLevel = 5;