loongarch: Init LSX/LASX support

LSX/LASX is the LOONGARCH 128-bit/256-bit SIMD Architecture.

Signed-off-by: Shiyou Yin <yinshiyou-hf@loongson.cn>
Signed-off-by: Xiwei Gu <guxiwei-hf@loongson.cn>
This commit is contained in:
Loongson Technology Corporation Limited 2023-10-02 22:42:52 +08:00 committed by Shiyou Yin
parent 5a9dfddea4
commit 1ecc51ee97
8 changed files with 73 additions and 4 deletions

View File

@ -197,6 +197,20 @@ SRCS_X += common/mips/dct-c.c \
endif
endif
# LOONGARCH optimization
ifeq ($(SYS_ARCH),LOONGARCH)
SRCS_X +=
OBJASM +=
ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
OBJASM += $(SRCASM_X:%.S=%-8.o)
endif
ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
OBJASM += $(SRCASM_X:%.S=%-10.o)
endif
endif
endif
ifneq ($(HAVE_GETOPT_LONG),1)

View File

@ -98,6 +98,9 @@ const x264_cpu_name_t x264_cpu_names[] =
{"NEON", X264_CPU_NEON},
#elif ARCH_MIPS
{"MSA", X264_CPU_MSA},
#elif ARCH_LOONGARCH
{"LSX", X264_CPU_LSX},
{"LASX", X264_CPU_LASX},
#endif
{"", 0},
};
@ -431,6 +434,25 @@ uint32_t x264_cpu_detect( void )
return X264_CPU_MSA;
}
#elif HAVE_LSX
#include <sys/auxv.h>
#define LA_HWCAP_LSX ( 1U << 4 )
#define LA_HWCAP_LASX ( 1U << 5 )
uint32_t x264_cpu_detect( void )
{
uint32_t flags = 0;
uint32_t hwcap = (uint32_t)getauxval( AT_HWCAP );
if( hwcap & LA_HWCAP_LSX )
flags |= X264_CPU_LSX;
if( hwcap & LA_HWCAP_LASX )
flags |= X264_CPU_LASX;
return flags;
}
#else
uint32_t x264_cpu_detect( void )

View File

@ -314,7 +314,7 @@ static inline int x264_is_regular_file( FILE *filehandle )
#define EXPAND(x) x
#if ARCH_X86 || ARCH_X86_64
#if ARCH_X86 || ARCH_X86_64 || ARCH_LOONGARCH
#define NATIVE_ALIGN 64
#define ALIGNED_32( var ) DECLARE_ALIGNED( var, 32 )
#define ALIGNED_64( var ) DECLARE_ALIGNED( var, 64 )

3
config.guess vendored
View File

@ -934,6 +934,9 @@ EOF
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
loongarch32:Linux:*:* | loongarch64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
m32r*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;

2
config.sub vendored
View File

@ -274,6 +274,7 @@ case $basic_machine in
| ip2k | iq2000 \
| le32 | le64 \
| lm32 \
| loongarch32 | loongarch64 \
| m32c | m32r | m32rle | m68000 | m68k | m88k \
| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
| mips | mipsbe | mipseb | mipsel | mipsle \
@ -389,6 +390,7 @@ case $basic_machine in
| ip2k-* | iq2000-* \
| le32-* | le64-* \
| lm32-* \
| loongarch32-* | loongarch64-* \
| m32c-* | m32r-* | m32rle-* \
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \

17
configure vendored
View File

@ -411,7 +411,7 @@ NL="
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
# parse options
@ -822,6 +822,12 @@ case $host_cpu in
AS="${AS-${CC}}"
AS_EXT=".c"
;;
loongarch*)
ARCH="LOONGARCH"
ASFLAGS="$ASFLAGS -c"
AS="${AS-${CC}}"
AS_EXT=".S"
;;
aarch64|arm64*)
ARCH="AARCH64"
stack_alignment=16
@ -1024,6 +1030,13 @@ if [ $asm = auto -a $ARCH = MIPS ] ; then
fi
fi
if [ $asm = auto -a $ARCH = LOONGARCH ] ; then
if cc_check '' '' '__asm__("xvadd.b $xr0, $xr1, $xr2");' ; then
# Use HAVE_LSX as the base flag, compiler support LA SIMD(LSX and LASX)
define HAVE_LSX
fi
fi
[ $asm = no ] && AS=""
[ "x$AS" = x ] && asm="no" || asm="yes"
@ -1674,7 +1687,7 @@ cat conftest.log >> config.log
cat conftest.log
[ "$SRCPATH" != "." ] && ln -sf ${SRCPATH}/Makefile ./Makefile
mkdir -p common/{aarch64,arm,mips,ppc,x86} encoder extras filters/video input output tools
mkdir -p common/{aarch64,arm,mips,ppc,x86,loongarch} encoder extras filters/video input output tools
echo
echo "You can run 'make' or 'make fprofiled' now."

View File

@ -117,6 +117,9 @@ static inline uint32_t read_time(void)
a = b;
#elif ARCH_MIPS
asm volatile( "rdhwr %0, $2" : "=r"(a) :: "memory" );
#elif ARCH_LOONGARCH
uint32_t id = 0;
asm volatile( "rdtimel.w %0, %1" : "=r"(a), "=r"(id) :: "memory" );
#endif
return a;
}
@ -215,6 +218,9 @@ static void print_bench(void)
b->cpu&X264_CPU_ARMV8 ? "armv8" :
#elif ARCH_MIPS
b->cpu&X264_CPU_MSA ? "msa" :
#elif ARCH_LOONGARCH
b->cpu&X264_CPU_LASX ? "lasx" :
b->cpu&X264_CPU_LSX ? "lsx" :
#endif
"c",
#if ARCH_X86 || ARCH_X86_64
@ -2976,6 +2982,11 @@ static int check_all_flags( void )
#elif ARCH_MIPS
if( cpu_detect & X264_CPU_MSA )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_MSA, "MSA" );
#elif ARCH_LOONGARCH
if( cpu_detect & X264_CPU_LSX )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_LSX, "LSX" );
if( cpu_detect & X264_CPU_LASX )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_LASX, "LASX" );
#endif
return ret;
}
@ -2989,7 +3000,7 @@ REALIGN_STACK int main( int argc, char **argv )
if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) )
{
#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS
#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS && !ARCH_LOONGARCH
fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" );
return 1;
#endif

4
x264.h
View File

@ -181,6 +181,10 @@ typedef struct x264_nal_t
/* MIPS */
#define X264_CPU_MSA 0x0000001U /* MIPS MSA */
/* LOONGARCH */
#define X264_CPU_LSX 0x0000001U /* LOONGARCH LSX */
#define X264_CPU_LASX 0x0000002U /* LOONGARCH LASX */
/* Analyse flags */
#define X264_ANALYSE_I4x4 0x0001U /* Analyse i4x4 */
#define X264_ANALYSE_I8x8 0x0002U /* Analyse i8x8 (requires 8x8 transform) */