From 1ecc51ee971e0056a53bd6cf9c6f6af18b167e4b Mon Sep 17 00:00:00 2001 From: Loongson Technology Corporation Limited Date: Mon, 2 Oct 2023 22:42:52 +0800 Subject: [PATCH] loongarch: Init LSX/LASX support LSX/LASX is the LOONGARCH 128-bit/256-bit SIMD Architecture. Signed-off-by: Shiyou Yin Signed-off-by: Xiwei Gu --- Makefile | 14 ++++++++++++++ common/cpu.c | 22 ++++++++++++++++++++++ common/osdep.h | 2 +- config.guess | 3 +++ config.sub | 2 ++ configure | 17 +++++++++++++++-- tools/checkasm.c | 13 ++++++++++++- x264.h | 4 ++++ 8 files changed, 73 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index cfb72dab..4a6659b0 100644 --- a/Makefile +++ b/Makefile @@ -197,6 +197,20 @@ SRCS_X += common/mips/dct-c.c \ endif endif +# LOONGARCH optimization +ifeq ($(SYS_ARCH),LOONGARCH) +SRCS_X += + +OBJASM += +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.S=%-8.o) +endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.S=%-10.o) +endif + +endif + endif ifneq ($(HAVE_GETOPT_LONG),1) diff --git a/common/cpu.c b/common/cpu.c index 2c39d7fe..1bdbc6b2 100644 --- a/common/cpu.c +++ b/common/cpu.c @@ -98,6 +98,9 @@ const x264_cpu_name_t x264_cpu_names[] = {"NEON", X264_CPU_NEON}, #elif ARCH_MIPS {"MSA", X264_CPU_MSA}, +#elif ARCH_LOONGARCH + {"LSX", X264_CPU_LSX}, + {"LASX", X264_CPU_LASX}, #endif {"", 0}, }; @@ -431,6 +434,25 @@ uint32_t x264_cpu_detect( void ) return X264_CPU_MSA; } +#elif HAVE_LSX +#include + +#define LA_HWCAP_LSX ( 1U << 4 ) +#define LA_HWCAP_LASX ( 1U << 5 ) + +uint32_t x264_cpu_detect( void ) +{ + uint32_t flags = 0; + uint32_t hwcap = (uint32_t)getauxval( AT_HWCAP ); + + if( hwcap & LA_HWCAP_LSX ) + flags |= X264_CPU_LSX; + if( hwcap & LA_HWCAP_LASX ) + flags |= X264_CPU_LASX; + + return flags; +} + #else uint32_t x264_cpu_detect( void ) diff --git a/common/osdep.h b/common/osdep.h index 22301212..277224c7 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -314,7 +314,7 @@ static inline int x264_is_regular_file( FILE *filehandle ) #define EXPAND(x) x -#if ARCH_X86 || ARCH_X86_64 +#if ARCH_X86 || ARCH_X86_64 || ARCH_LOONGARCH #define NATIVE_ALIGN 64 #define ALIGNED_32( var ) DECLARE_ALIGNED( var, 32 ) #define ALIGNED_64( var ) DECLARE_ALIGNED( var, 64 ) diff --git a/config.guess b/config.guess index 14c12963..7eec710e 100755 --- a/config.guess +++ b/config.guess @@ -934,6 +934,9 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; diff --git a/config.sub b/config.sub index 72e9265b..3093784c 100755 --- a/config.sub +++ b/config.sub @@ -274,6 +274,7 @@ case $basic_machine in | ip2k | iq2000 \ | le32 | le64 \ | lm32 \ + | loongarch32 | loongarch64 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ @@ -389,6 +390,7 @@ case $basic_machine in | ip2k-* | iq2000-* \ | le32-* | le64-* \ | lm32-* \ + | loongarch32-* | loongarch64-* \ | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ diff --git a/configure b/configure index e242e73c..53078ece 100755 --- a/configure +++ b/configure @@ -411,7 +411,7 @@ NL=" # list of all preprocessor HAVE values we can define CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \ - MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10" + MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10" # parse options @@ -822,6 +822,12 @@ case $host_cpu in AS="${AS-${CC}}" AS_EXT=".c" ;; + loongarch*) + ARCH="LOONGARCH" + ASFLAGS="$ASFLAGS -c" + AS="${AS-${CC}}" + AS_EXT=".S" + ;; aarch64|arm64*) ARCH="AARCH64" stack_alignment=16 @@ -1024,6 +1030,13 @@ if [ $asm = auto -a $ARCH = MIPS ] ; then fi fi +if [ $asm = auto -a $ARCH = LOONGARCH ] ; then + if cc_check '' '' '__asm__("xvadd.b $xr0, $xr1, $xr2");' ; then + # Use HAVE_LSX as the base flag, compiler support LA SIMD(LSX and LASX) + define HAVE_LSX + fi +fi + [ $asm = no ] && AS="" [ "x$AS" = x ] && asm="no" || asm="yes" @@ -1674,7 +1687,7 @@ cat conftest.log >> config.log cat conftest.log [ "$SRCPATH" != "." ] && ln -sf ${SRCPATH}/Makefile ./Makefile -mkdir -p common/{aarch64,arm,mips,ppc,x86} encoder extras filters/video input output tools +mkdir -p common/{aarch64,arm,mips,ppc,x86,loongarch} encoder extras filters/video input output tools echo echo "You can run 'make' or 'make fprofiled' now." diff --git a/tools/checkasm.c b/tools/checkasm.c index 20775714..90f76132 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -117,6 +117,9 @@ static inline uint32_t read_time(void) a = b; #elif ARCH_MIPS asm volatile( "rdhwr %0, $2" : "=r"(a) :: "memory" ); +#elif ARCH_LOONGARCH + uint32_t id = 0; + asm volatile( "rdtimel.w %0, %1" : "=r"(a), "=r"(id) :: "memory" ); #endif return a; } @@ -215,6 +218,9 @@ static void print_bench(void) b->cpu&X264_CPU_ARMV8 ? "armv8" : #elif ARCH_MIPS b->cpu&X264_CPU_MSA ? "msa" : +#elif ARCH_LOONGARCH + b->cpu&X264_CPU_LASX ? "lasx" : + b->cpu&X264_CPU_LSX ? "lsx" : #endif "c", #if ARCH_X86 || ARCH_X86_64 @@ -2976,6 +2982,11 @@ static int check_all_flags( void ) #elif ARCH_MIPS if( cpu_detect & X264_CPU_MSA ) ret |= add_flags( &cpu0, &cpu1, X264_CPU_MSA, "MSA" ); +#elif ARCH_LOONGARCH + if( cpu_detect & X264_CPU_LSX ) + ret |= add_flags( &cpu0, &cpu1, X264_CPU_LSX, "LSX" ); + if( cpu_detect & X264_CPU_LASX ) + ret |= add_flags( &cpu0, &cpu1, X264_CPU_LASX, "LASX" ); #endif return ret; } @@ -2989,7 +3000,7 @@ REALIGN_STACK int main( int argc, char **argv ) if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) ) { -#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS +#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS && !ARCH_LOONGARCH fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" ); return 1; #endif diff --git a/x264.h b/x264.h index b8619d4b..10884d7b 100644 --- a/x264.h +++ b/x264.h @@ -181,6 +181,10 @@ typedef struct x264_nal_t /* MIPS */ #define X264_CPU_MSA 0x0000001U /* MIPS MSA */ +/* LOONGARCH */ +#define X264_CPU_LSX 0x0000001U /* LOONGARCH LSX */ +#define X264_CPU_LASX 0x0000002U /* LOONGARCH LASX */ + /* Analyse flags */ #define X264_ANALYSE_I4x4 0x0001U /* Analyse i4x4 */ #define X264_ANALYSE_I8x8 0x0002U /* Analyse i8x8 (requires 8x8 transform) */