diff --git a/include/vlc_cpu.h b/include/vlc_cpu.h index 0532e3fe79..437ccbba81 100644 --- a/include/vlc_cpu.h +++ b/include/vlc_cpu.h @@ -32,8 +32,8 @@ VLC_API unsigned vlc_CPU(void); # define HAVE_FPU 1 # define VLC_CPU_MMX 8 # define CPU_CAPABILITY_3DNOW (1<<4) -# define CPU_CAPABILITY_MMXEXT (1<<5) -# define CPU_CAPABILITY_SSE (1<<6) +# define VLC_CPU_MMXEXT 32 +# define VLC_CPU_SSE 64 # define CPU_CAPABILITY_SSE2 (1<<7) # define CPU_CAPABILITY_SSE3 (1<<8) # define CPU_CAPABILITY_SSSE3 (1<<9) @@ -54,11 +54,17 @@ VLC_API unsigned vlc_CPU(void); # endif # if defined (__SSE__) +# define vlc_CPU_MMXEXT() (1) +# define vlc_CPU_SSE() (1) # define VLC_SSE -# elif VLC_GCC_VERSION(4, 4) -# define VLC_SSE __attribute__ ((__target__ ("sse"))) # else -# define VLC_SSE VLC_SSE_is_not_implemented_on_this_compiler +# define vlc_CPU_MMXEXT() ((vlc_CPU() & VLC_CPU_MMXEXT) != 0) +# define vlc_CPU_SSE() ((vlc_CPU() & VLC_CPU_SSE) != 0) +# if VLC_GCC_VERSION(4, 4) +# define VLC_SSE __attribute__ ((__target__ ("sse"))) +# else +# define VLC_SSE VLC_SSE_is_not_implemented_on_this_compiler +# endif # endif # elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__) diff --git a/modules/codec/avcodec/avcodec.c b/modules/codec/avcodec/avcodec.c index bcc178d9cd..a7c66780dd 100644 --- a/modules/codec/avcodec/avcodec.c +++ b/modules/codec/avcodec/avcodec.c @@ -334,11 +334,11 @@ static int OpenDecoder( vlc_object_t *p_this ) unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX; - if( !(i_cpu & CPU_CAPABILITY_MMXEXT) ) + if( !vlc_CPU_MMXEXT() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX2; if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; - if( !(i_cpu & CPU_CAPABILITY_SSE) ) + if( !vlc_CPU_SSE() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE; if( !(i_cpu & CPU_CAPABILITY_SSE2) ) p_context->dsp_mask |= AV_CPU_FLAG_SSE2; diff --git a/modules/codec/avcodec/encoder.c b/modules/codec/avcodec/encoder.c index 2fc413381d..a851744357 100644 --- a/modules/codec/avcodec/encoder.c +++ b/modules/codec/avcodec/encoder.c @@ -328,11 +328,11 @@ int OpenEncoder( vlc_object_t *p_this ) unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX; - if( !(i_cpu & CPU_CAPABILITY_MMXEXT) ) + if( !vlc_CPU_MMXEXT() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX2; if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; - if( !(i_cpu & CPU_CAPABILITY_SSE) ) + if( !vlc_CPU_SSE() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE; if( !(i_cpu & CPU_CAPABILITY_SSE2) ) p_context->dsp_mask |= AV_CPU_FLAG_SSE2; diff --git a/modules/codec/libmpeg2.c b/modules/codec/libmpeg2.c index 45b13418c9..deed2831f4 100644 --- a/modules/codec/libmpeg2.c +++ b/modules/codec/libmpeg2.c @@ -197,7 +197,7 @@ static int OpenDecoder( vlc_object_t *p_this ) i_accel |= MPEG2_ACCEL_X86_MMX; if( cpu & CPU_CAPABILITY_3DNOW ) i_accel |= MPEG2_ACCEL_X86_3DNOW; - if( cpu & CPU_CAPABILITY_MMXEXT ) + if( vlc_CPU_MMXEXT() ) i_accel |= MPEG2_ACCEL_X86_MMXEXT; #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __ppc64__ ) if( vlc_CPU_ALTIVEC() ) diff --git a/modules/codec/x264.c b/modules/codec/x264.c index 3d3553176a..64bb8af66a 100644 --- a/modules/codec/x264.c +++ b/modules/codec/x264.c @@ -1260,14 +1260,13 @@ static int Open ( vlc_object_t *p_this ) free( psz_val ); #if defined (__i386__) || defined (__x86_64__) - unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) p_sys->param.cpu &= ~X264_CPU_MMX; - if( !(i_cpu & CPU_CAPABILITY_MMXEXT) ) + if( !vlc_CPU_MMXEXT() ) p_sys->param.cpu &= ~X264_CPU_MMXEXT; - if( !(i_cpu & CPU_CAPABILITY_SSE) ) + if( !vlc_CPU_SSE() ) p_sys->param.cpu &= ~X264_CPU_SSE; - if( !(i_cpu & CPU_CAPABILITY_SSE2) ) + if( !(vlc_CPU() & CPU_CAPABILITY_SSE2) ) p_sys->param.cpu &= ~X264_CPU_SSE2; #endif diff --git a/modules/stream_out/switcher.c b/modules/stream_out/switcher.c index 06c738a37c..5e61c183d9 100644 --- a/modules/stream_out/switcher.c +++ b/modules/stream_out/switcher.c @@ -383,11 +383,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt ) unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX; - if( !(i_cpu & CPU_CAPABILITY_MMXEXT) ) + if( !vlc_CPU_MMXEXT() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX2; if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW; - if( !(i_cpu & CPU_CAPABILITY_SSE) ) + if( !vlc_CPU_SSE() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE; if( !(i_cpu & CPU_CAPABILITY_SSE2) ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2; @@ -804,11 +804,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id ) unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX; - if( !(i_cpu & CPU_CAPABILITY_MMXEXT) ) + if( !vlc_CPU_MMXEXT() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX2; if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW; - if( !(i_cpu & CPU_CAPABILITY_SSE) ) + if( !vlc_CPU_SSE() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE; if( !(i_cpu & CPU_CAPABILITY_SSE2) ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2; diff --git a/modules/video_filter/deinterlace/algo_phosphor.c b/modules/video_filter/deinterlace/algo_phosphor.c index d4e4bed662..b16ce55ce0 100644 --- a/modules/video_filter/deinterlace/algo_phosphor.c +++ b/modules/video_filter/deinterlace/algo_phosphor.c @@ -79,7 +79,9 @@ static void DarkenField( picture_t *p_dst, /* Bitwise ANDing with this clears the i_strength highest bits of each byte */ #ifdef CAN_COMPILE_MMXEXT - unsigned u_cpu = vlc_CPU(); +# ifndef __SSE__ + const unsigned u_cpu = vlc_CPU(); +# endif uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know number of bits) */ #endif @@ -113,7 +115,9 @@ static void DarkenField( picture_t *p_dst, int x = 0; #ifdef CAN_COMPILE_MMXEXT - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif { movq_m2r( i_strength_u64, mm1 ); movq_m2r( remove_high_u64, mm2 ); @@ -128,11 +132,11 @@ static void DarkenField( picture_t *p_dst, } } else - { #endif +#if !defined (CAN_COMPILE_MMXEXT) || !defined (__SSE__) + { for( ; x < w8; x += 8, ++po ) (*po) = ( ((*po) >> i_strength) & remove_high_u64 ); -#ifdef CAN_COMPILE_MMXEXT } #endif @@ -174,7 +178,9 @@ static void DarkenField( picture_t *p_dst, #ifdef CAN_COMPILE_MMXEXT /* See also easy-to-read C version below. */ - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif { static const mmx_t b128 = { .uq = 0x8080808080808080ULL }; movq_m2r( b128, mm5 ); @@ -216,7 +222,9 @@ static void DarkenField( picture_t *p_dst, } /* if process_chroma */ #ifdef CAN_COMPILE_MMXEXT - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif emms(); #endif } diff --git a/modules/video_filter/deinterlace/algo_x.c b/modules/video_filter/deinterlace/algo_x.c index 4724c35255..dd5bcd10e3 100644 --- a/modules/video_filter/deinterlace/algo_x.c +++ b/modules/video_filter/deinterlace/algo_x.c @@ -535,8 +535,8 @@ static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst, void RenderX( picture_t *p_outpic, picture_t *p_pic ) { int i_plane; -#ifdef CAN_COMPILE_MMXEXT - unsigned u_cpu = vlc_CPU(); +#if defined (CAN_COMPILE_MMXEXT) && !defined(__SSE__) + const unsigned u_cpu = vlc_CPU(); #endif /* Copy image and skip lines */ @@ -559,11 +559,15 @@ void RenderX( picture_t *p_outpic, picture_t *p_pic ) uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src]; #ifdef CAN_COMPILE_MMXEXT - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx ); else #endif +#ifndef __SSE__ XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx ); +#endif } /* Last line (C only)*/ @@ -586,7 +590,9 @@ void RenderX( picture_t *p_outpic, picture_t *p_pic ) } #ifdef CAN_COMPILE_MMXEXT - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif emms(); #endif } diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c index 3c6691ef9f..375411d8d6 100644 --- a/modules/video_filter/deinterlace/deinterlace.c +++ b/modules/video_filter/deinterlace/deinterlace.c @@ -641,7 +641,7 @@ int Open( vlc_object_t *p_this ) else #endif #if defined(CAN_COMPILE_MMXEXT) - if( chroma->pixel_size == 1 && (vlc_CPU() & CPU_CAPABILITY_MMXEXT) ) + if( chroma->pixel_size == 1 && vlc_CPU_MMXEXT() ) { p_sys->pf_merge = MergeMMXEXT; p_sys->pf_end_merge = EndMMX; diff --git a/modules/video_filter/deinterlace/helpers.c b/modules/video_filter/deinterlace/helpers.c index 6fdafd29e3..9d2b895925 100644 --- a/modules/video_filter/deinterlace/helpers.c +++ b/modules/video_filter/deinterlace/helpers.c @@ -113,7 +113,7 @@ static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src, int i_field ) * @param[in] p_pix_c Base pointer to the same block in current picture * @param i_pitch_prev i_pitch of previous picture * @param i_pitch_curr i_pitch of current picture - * @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false. + * @param b_mmx (vlc_CPU() & VLC_CPU_MMXEXT) or false. * @param[out] pi_top 1 if top field of the block had motion, 0 if no * @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no * @return 1 if the block had motion, 0 if no @@ -388,9 +388,9 @@ int EstimateNumBlocksWithMotion( const picture_t* p_prev, /* We must tell our inline helper whether to use MMX acceleration. */ #ifdef CAN_COMPILE_MMXEXT - bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT ); + const bool b_mmx = vlc_CPU_MMXEXT(); #else - bool b_mmx = false; + const bool b_mmx = false; #endif int i_score = 0; @@ -466,9 +466,11 @@ int CalculateInterlaceScore( const picture_t* p_pic_top, int32_t i_score_c = 0; /* this counts as-is (used for non-MMX parts) */ #ifdef CAN_COMPILE_MMXEXT - unsigned u_cpu = vlc_CPU(); +# ifndef __SSE__ + const unsigned u_cpu = vlc_CPU(); - if( u_cpu & CPU_CAPABILITY_MMXEXT ) + if( u_cpu & VLC_CPU_MMXEXT ) +# endif pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */ #endif @@ -512,7 +514,9 @@ int CalculateInterlaceScore( const picture_t* p_pic_top, # of pixels < (2^32)/255 Note: calculates score * 255 */ - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif { static const mmx_t b0 = { .uq = 0x0000000000000000ULL }; static const mmx_t b128 = { .uq = 0x8080808080808080ULL }; @@ -591,7 +595,9 @@ int CalculateInterlaceScore( const picture_t* p_pic_top, } #ifdef CAN_COMPILE_MMXEXT - if( u_cpu & CPU_CAPABILITY_MMXEXT ) +# ifndef __SSE__ + if( u_cpu & VLC_CPU_MMXEXT ) +# endif { movd_r2m( mm7, i_score_mmx ); emms(); diff --git a/modules/video_filter/gradfun.c b/modules/video_filter/gradfun.c index 98d9860e40..d3d153c84f 100644 --- a/modules/video_filter/gradfun.c +++ b/modules/video_filter/gradfun.c @@ -133,21 +133,24 @@ static int Open(vlc_object_t *object) cfg->thresh = 0.0; cfg->radius = 0; cfg->buf = NULL; - cfg->filter_line = filter_line_c; - cfg->blur_line = blur_line_c; #if HAVE_SSE2 && HAVE_6REGS if (vlc_CPU() & CPU_CAPABILITY_SSE2) cfg->blur_line = blur_line_sse2; + else #endif -#if HAVE_MMX2 - if (vlc_CPU() & CPU_CAPABILITY_MMXEXT) - cfg->filter_line = filter_line_mmx2; -#endif + cfg->blur_line = blur_line_c; #if HAVE_SSSE3 if (vlc_CPU() & CPU_CAPABILITY_SSSE3) cfg->filter_line = filter_line_ssse3; + else #endif +#if HAVE_MMX2 + if (vlc_CPU_MMXEXT()) + cfg->filter_line = filter_line_mmx2; + else +#endif + cfg->filter_line = filter_line_c; filter->p_sys = sys; filter->pf_video_filter = Filter; diff --git a/modules/video_filter/postproc.c b/modules/video_filter/postproc.c index bf46a63c12..5fb51274fd 100644 --- a/modules/video_filter/postproc.c +++ b/modules/video_filter/postproc.c @@ -134,12 +134,11 @@ static int OpenPostproc( vlc_object_t *p_this ) /* Set CPU capabilities */ #if defined(__i386__) || defined(__x86_64__) - unsigned i_cpu = vlc_CPU(); if( vlc_CPU_MMX() ) i_flags |= PP_CPU_CAPS_MMX; - if( i_cpu & CPU_CAPABILITY_MMXEXT ) + if( vlc_CPU_MMXEXT() ) i_flags |= PP_CPU_CAPS_MMX2; - if( i_cpu & CPU_CAPABILITY_3DNOW ) + if( vlc_CPU() & CPU_CAPABILITY_3DNOW ) i_flags |= PP_CPU_CAPS_3DNOW; #elif defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__) if( vlc_CPU_ALTIVEC() ) diff --git a/modules/video_filter/swscale.c b/modules/video_filter/swscale.c index df2d560793..9a5bfda4f7 100644 --- a/modules/video_filter/swscale.c +++ b/modules/video_filter/swscale.c @@ -232,14 +232,13 @@ static int GetSwsCpuMask(void) int i_sws_cpu = 0; #if defined(__i386__) || defined(__x86_64__) - const unsigned int i_cpu = vlc_CPU(); if( vlc_CPU_MMX() ) i_sws_cpu |= SWS_CPU_CAPS_MMX; #if (LIBSWSCALE_VERSION_INT >= ((0<<16)+(5<<8)+0)) - if( i_cpu & CPU_CAPABILITY_MMXEXT ) + if( vlc_CPU_MMXEXT() ) i_sws_cpu |= SWS_CPU_CAPS_MMX2; #endif - if( i_cpu & CPU_CAPABILITY_3DNOW ) + if( vlc_CPU() & CPU_CAPABILITY_3DNOW ) i_sws_cpu |= SWS_CPU_CAPS_3DNOW; #elif defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__) if( vlc_CPU_ALTIVEC() ) diff --git a/src/misc/cpu.c b/src/misc/cpu.c index 22e34c35ea..3d997bbab0 100644 --- a/src/misc/cpu.c +++ b/src/misc/cpu.c @@ -223,19 +223,14 @@ void vlc_CPU_init (void) # endif i_capabilities |= VLC_CPU_MMX; -# if defined (__SSE__) - i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE; -# else if( i_edx & 0x02000000 ) { - i_capabilities |= CPU_CAPABILITY_MMXEXT; - + i_capabilities |= VLC_CPU_MMXEXT; # ifdef CAN_COMPILE_SSE if (vlc_CPU_check ("SSE", SSE_test)) - i_capabilities |= CPU_CAPABILITY_SSE; + i_capabilities |= VLC_CPU_SSE; # endif } -# endif # if defined (__SSE2__) i_capabilities |= CPU_CAPABILITY_SSE2; @@ -289,9 +284,7 @@ void vlc_CPU_init (void) # endif if( b_amd && ( i_edx & 0x00400000 ) ) - { - i_capabilities |= CPU_CAPABILITY_MMXEXT; - } + i_capabilities |= VLC_CPU_MMXEXT; out: #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \ @@ -353,15 +346,15 @@ void vlc_CPU_dump (vlc_object_t *obj) p += sprintf (p, "%s ", (string) ) if (vlc_CPU_MMX()) p += sprintf (p, "MMX "); - PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!"); - PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT"); - PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE"); + if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT "); + if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");; PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2"); PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3"); PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3"); PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1"); PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2"); PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A"); + PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!"); #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__) if (vlc_CPU_ALTIVEC()) p += sprintf (p, "AltiVec"); diff --git a/src/posix/linux_cpu.c b/src/posix/linux_cpu.c index 5869d35ed0..f93472e9bb 100644 --- a/src/posix/linux_cpu.c +++ b/src/posix/linux_cpu.c @@ -69,12 +69,10 @@ static void vlc_CPU_init (void) #elif defined (__i386__) || defined (__x86_64__) if (!strcmp (cap, "mmx")) core_caps |= VLC_CPU_MMX; -# ifndef __SSE__ if (!strcmp (cap, "sse")) - core_caps |= CPU_CAPABILITY_SSE | CPU_CAPABILITY_MMXEXT; + core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT; if (!strcmp (cap, "mmxext")) - core_caps |= CPU_CAPABILITY_MMXEXT; -# endif + core_caps |= VLC_CPU_MMXEXT; # ifndef __SSE2__ if (!strcmp (cap, "sse2")) core_caps |= CPU_CAPABILITY_SSE2; @@ -119,9 +117,6 @@ static void vlc_CPU_init (void) /* Always enable capabilities that were forced during compilation */ #if defined (__i386__) || defined (__x86_64__) -# ifdef __SSE__ - all_caps |= CPU_CAPABILITY_SSE | CPU_CAPABILITY_MMXEXT; -# endif # ifdef __SSE2__ all_caps |= CPU_CAPABILITY_SSE2; # endif