mirror of https://code.videolan.org/videolan/x264
loongarch: Init LSX/LASX support
LSX/LASX is the LOONGARCH 128-bit/256-bit SIMD Architecture. Signed-off-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Xiwei Gu <guxiwei-hf@loongson.cn>
This commit is contained in:
parent
5a9dfddea4
commit
1ecc51ee97
14
Makefile
14
Makefile
|
@ -197,6 +197,20 @@ SRCS_X += common/mips/dct-c.c \
|
|||
endif
|
||||
endif
|
||||
|
||||
# LOONGARCH optimization
|
||||
ifeq ($(SYS_ARCH),LOONGARCH)
|
||||
SRCS_X +=
|
||||
|
||||
OBJASM +=
|
||||
ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
|
||||
OBJASM += $(SRCASM_X:%.S=%-8.o)
|
||||
endif
|
||||
ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
|
||||
OBJASM += $(SRCASM_X:%.S=%-10.o)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifneq ($(HAVE_GETOPT_LONG),1)
|
||||
|
|
22
common/cpu.c
22
common/cpu.c
|
@ -98,6 +98,9 @@ const x264_cpu_name_t x264_cpu_names[] =
|
|||
{"NEON", X264_CPU_NEON},
|
||||
#elif ARCH_MIPS
|
||||
{"MSA", X264_CPU_MSA},
|
||||
#elif ARCH_LOONGARCH
|
||||
{"LSX", X264_CPU_LSX},
|
||||
{"LASX", X264_CPU_LASX},
|
||||
#endif
|
||||
{"", 0},
|
||||
};
|
||||
|
@ -431,6 +434,25 @@ uint32_t x264_cpu_detect( void )
|
|||
return X264_CPU_MSA;
|
||||
}
|
||||
|
||||
#elif HAVE_LSX
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#define LA_HWCAP_LSX ( 1U << 4 )
|
||||
#define LA_HWCAP_LASX ( 1U << 5 )
|
||||
|
||||
uint32_t x264_cpu_detect( void )
|
||||
{
|
||||
uint32_t flags = 0;
|
||||
uint32_t hwcap = (uint32_t)getauxval( AT_HWCAP );
|
||||
|
||||
if( hwcap & LA_HWCAP_LSX )
|
||||
flags |= X264_CPU_LSX;
|
||||
if( hwcap & LA_HWCAP_LASX )
|
||||
flags |= X264_CPU_LASX;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
uint32_t x264_cpu_detect( void )
|
||||
|
|
|
@ -314,7 +314,7 @@ static inline int x264_is_regular_file( FILE *filehandle )
|
|||
|
||||
#define EXPAND(x) x
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if ARCH_X86 || ARCH_X86_64 || ARCH_LOONGARCH
|
||||
#define NATIVE_ALIGN 64
|
||||
#define ALIGNED_32( var ) DECLARE_ALIGNED( var, 32 )
|
||||
#define ALIGNED_64( var ) DECLARE_ALIGNED( var, 64 )
|
||||
|
|
|
@ -934,6 +934,9 @@ EOF
|
|||
ia64:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-gnu
|
||||
exit ;;
|
||||
loongarch32:Linux:*:* | loongarch64:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-gnu
|
||||
exit ;;
|
||||
m32r*:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-gnu
|
||||
exit ;;
|
||||
|
|
|
@ -274,6 +274,7 @@ case $basic_machine in
|
|||
| ip2k | iq2000 \
|
||||
| le32 | le64 \
|
||||
| lm32 \
|
||||
| loongarch32 | loongarch64 \
|
||||
| m32c | m32r | m32rle | m68000 | m68k | m88k \
|
||||
| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
|
||||
| mips | mipsbe | mipseb | mipsel | mipsle \
|
||||
|
@ -389,6 +390,7 @@ case $basic_machine in
|
|||
| ip2k-* | iq2000-* \
|
||||
| le32-* | le64-* \
|
||||
| lm32-* \
|
||||
| loongarch32-* | loongarch64-* \
|
||||
| m32c-* | m32r-* | m32rle-* \
|
||||
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
|
||||
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
|
||||
|
|
|
@ -411,7 +411,7 @@ NL="
|
|||
# list of all preprocessor HAVE values we can define
|
||||
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
|
||||
LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
|
||||
MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
|
||||
MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
|
||||
|
||||
# parse options
|
||||
|
||||
|
@ -822,6 +822,12 @@ case $host_cpu in
|
|||
AS="${AS-${CC}}"
|
||||
AS_EXT=".c"
|
||||
;;
|
||||
loongarch*)
|
||||
ARCH="LOONGARCH"
|
||||
ASFLAGS="$ASFLAGS -c"
|
||||
AS="${AS-${CC}}"
|
||||
AS_EXT=".S"
|
||||
;;
|
||||
aarch64|arm64*)
|
||||
ARCH="AARCH64"
|
||||
stack_alignment=16
|
||||
|
@ -1024,6 +1030,13 @@ if [ $asm = auto -a $ARCH = MIPS ] ; then
|
|||
fi
|
||||
fi
|
||||
|
||||
if [ $asm = auto -a $ARCH = LOONGARCH ] ; then
|
||||
if cc_check '' '' '__asm__("xvadd.b $xr0, $xr1, $xr2");' ; then
|
||||
# Use HAVE_LSX as the base flag, compiler support LA SIMD(LSX and LASX)
|
||||
define HAVE_LSX
|
||||
fi
|
||||
fi
|
||||
|
||||
[ $asm = no ] && AS=""
|
||||
[ "x$AS" = x ] && asm="no" || asm="yes"
|
||||
|
||||
|
@ -1674,7 +1687,7 @@ cat conftest.log >> config.log
|
|||
cat conftest.log
|
||||
|
||||
[ "$SRCPATH" != "." ] && ln -sf ${SRCPATH}/Makefile ./Makefile
|
||||
mkdir -p common/{aarch64,arm,mips,ppc,x86} encoder extras filters/video input output tools
|
||||
mkdir -p common/{aarch64,arm,mips,ppc,x86,loongarch} encoder extras filters/video input output tools
|
||||
|
||||
echo
|
||||
echo "You can run 'make' or 'make fprofiled' now."
|
||||
|
|
|
@ -117,6 +117,9 @@ static inline uint32_t read_time(void)
|
|||
a = b;
|
||||
#elif ARCH_MIPS
|
||||
asm volatile( "rdhwr %0, $2" : "=r"(a) :: "memory" );
|
||||
#elif ARCH_LOONGARCH
|
||||
uint32_t id = 0;
|
||||
asm volatile( "rdtimel.w %0, %1" : "=r"(a), "=r"(id) :: "memory" );
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
@ -215,6 +218,9 @@ static void print_bench(void)
|
|||
b->cpu&X264_CPU_ARMV8 ? "armv8" :
|
||||
#elif ARCH_MIPS
|
||||
b->cpu&X264_CPU_MSA ? "msa" :
|
||||
#elif ARCH_LOONGARCH
|
||||
b->cpu&X264_CPU_LASX ? "lasx" :
|
||||
b->cpu&X264_CPU_LSX ? "lsx" :
|
||||
#endif
|
||||
"c",
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
|
@ -2976,6 +2982,11 @@ static int check_all_flags( void )
|
|||
#elif ARCH_MIPS
|
||||
if( cpu_detect & X264_CPU_MSA )
|
||||
ret |= add_flags( &cpu0, &cpu1, X264_CPU_MSA, "MSA" );
|
||||
#elif ARCH_LOONGARCH
|
||||
if( cpu_detect & X264_CPU_LSX )
|
||||
ret |= add_flags( &cpu0, &cpu1, X264_CPU_LSX, "LSX" );
|
||||
if( cpu_detect & X264_CPU_LASX )
|
||||
ret |= add_flags( &cpu0, &cpu1, X264_CPU_LASX, "LASX" );
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
@ -2989,7 +3000,7 @@ REALIGN_STACK int main( int argc, char **argv )
|
|||
|
||||
if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) )
|
||||
{
|
||||
#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS
|
||||
#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS && !ARCH_LOONGARCH
|
||||
fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" );
|
||||
return 1;
|
||||
#endif
|
||||
|
|
4
x264.h
4
x264.h
|
@ -181,6 +181,10 @@ typedef struct x264_nal_t
|
|||
/* MIPS */
|
||||
#define X264_CPU_MSA 0x0000001U /* MIPS MSA */
|
||||
|
||||
/* LOONGARCH */
|
||||
#define X264_CPU_LSX 0x0000001U /* LOONGARCH LSX */
|
||||
#define X264_CPU_LASX 0x0000002U /* LOONGARCH LASX */
|
||||
|
||||
/* Analyse flags */
|
||||
#define X264_ANALYSE_I4x4 0x0001U /* Analyse i4x4 */
|
||||
#define X264_ANALYSE_I8x8 0x0002U /* Analyse i8x8 (requires 8x8 transform) */
|
||||
|
|
Loading…
Reference in New Issue