From 5015984b9c5ed74c5692fd90e0a2e5150be51277 Mon Sep 17 00:00:00 2001 From: Eric Petit Date: Fri, 23 Jan 2004 15:36:23 +0000 Subject: [PATCH] + deinterlace.c : added an Altivec version of Merge() - makes the filter more than 100% faster here (blend mode). Feel free to check if I haven't broken anything, first time doing Altivec ;) --- configure.ac | 4 +- .../video_filter/deinterlace/deinterlace.c | 59 +++++++++++++++++-- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 8f0a8c4534..7f1396f9b3 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Autoconf settings for vlc -dnl $Id: configure.ac,v 1.152 2004/01/22 01:20:39 jlj Exp $ +dnl $Id: configure.ac,v 1.153 2004/01/23 15:36:23 titer Exp $ AC_INIT(vlc,0.7.1-cvs) @@ -995,7 +995,7 @@ AC_CACHE_CHECK([if \$CC groks AltiVec C extensions], CFLAGS="${CFLAGS_save}"]) if test "${ac_cv_c_altivec}" != "no"; then AC_DEFINE(CAN_COMPILE_C_ALTIVEC, 1, Define if your compiler groks C AltiVec extensions.) - AX_ADD_CFLAGS([vlc idctaltivec motionaltivec memcpyaltivec],[${ac_cv_c_altivec}]) + AX_ADD_CFLAGS([vlc idctaltivec motionaltivec memcpyaltivec deinterlace],[${ac_cv_c_altivec}]) ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES}" fi diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c index 35c1e2291b..aa58eb00b5 100644 --- a/modules/video_filter/deinterlace/deinterlace.c +++ b/modules/video_filter/deinterlace/deinterlace.c @@ -2,7 +2,7 @@ * deinterlace.c : deinterlacer plugin for vlc ***************************************************************************** * Copyright (C) 2000, 2001, 2002, 2003 VideoLAN - * $Id: deinterlace.c,v 1.18 2003/12/22 14:32:56 sam Exp $ + * $Id: deinterlace.c,v 1.19 2004/01/23 15:36:23 titer Exp $ * * Author: Sam Hocevar * @@ -55,7 +55,8 @@ static void RenderMean ( vout_thread_t *, picture_t *, picture_t * ); static void RenderBlend ( vout_thread_t *, picture_t *, picture_t * ); static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int ); -static void Merge ( void *, const void *, const void *, size_t ); +static void MergeGeneric ( void *, const void *, const void *, size_t ); +static void MergeAltivec ( void *, const void *, const void *, size_t ); static int SendEvents ( vlc_object_t *, char const *, vlc_value_t, vlc_value_t, void * ); @@ -107,6 +108,8 @@ struct vout_sys_t vout_thread_t *p_vout; vlc_mutex_t filter_lock; + + void (*pf_merge) ( void *, const void *, const void *, size_t ); }; /***************************************************************************** @@ -138,6 +141,15 @@ static int Create( vlc_object_t *p_this ) p_vout->p_sys->last_date = 0; vlc_mutex_init( p_vout, &p_vout->p_sys->filter_lock ); + if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_ALTIVEC ) + { + p_vout->p_sys->pf_merge = MergeAltivec; + } + else + { + p_vout->p_sys->pf_merge = MergeGeneric; + } + /* Look what method was requested */ var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING ); var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL ); @@ -617,6 +629,8 @@ static void RenderBob( vout_thread_t *p_vout, } } +#define Merge p_vout->p_sys->pf_merge + /***************************************************************************** * RenderLinear: BOB with linear interpolation *****************************************************************************/ @@ -774,8 +788,10 @@ static void RenderBlend( vout_thread_t *p_vout, } } -static void Merge( void *_p_dest, const void *_p_s1, - const void *_p_s2, size_t i_bytes ) +#undef Merge + +static void MergeGeneric( void *_p_dest, const void *_p_s1, + const void *_p_s2, size_t i_bytes ) { uint8_t* p_dest = (uint8_t*)_p_dest; const uint8_t *p_s1 = (const uint8_t *)_p_s1; @@ -802,6 +818,41 @@ static void Merge( void *_p_dest, const void *_p_s1, } } +static void MergeAltivec( void *_p_dest, const void *_p_s1, + const void *_p_s2, size_t i_bytes ) +{ +#ifdef CAN_COMPILE_C_ALTIVEC + uint8_t *p_dest = (uint8_t*)_p_dest; + const uint8_t *p_s1 = (const uint8_t *)_p_s1; + const uint8_t *p_s2 = (const uint8_t *)_p_s2; + uint8_t *p_end = p_dest + i_bytes - 16; + + if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) | + ( (int)p_dest & 0xF ) ) + { + /* TODO Handle non 16-bytes aligned planes */ + MergeGeneric( _p_dest, _p_s1, _p_s2, i_bytes ); + return; + } + + while( p_dest < p_end ) + { + vec_st( vec_avg( vec_ld( 0, p_s1 ), vec_ld( 0, p_s2 ) ), + 0, p_dest ); + p_s1 += 16; + p_s2 += 16; + p_dest += 16; + } + + p_end += 16; + + while( p_dest < p_end ) + { + *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1; + } +#endif +} + /***************************************************************************** * SendEvents: forward mouse and keyboard events to the parent p_vout *****************************************************************************/