mirror of https://code.videolan.org/videolan/x264
loongarch: Improve the performance of sad/sad_x3/sad_x4 series functions
Performance has improved from 4.92fps to 6.32fps. Tested with following command: ./configure && make -j5 ./x264 --threads 4 -o out.mkv yuv_1920x1080.yuv functions performance performance (c) (asm) sad_4x4 13 3 sad_4x8 26 7 sad_4x16 57 13 sad_8x4 24 3 sad_8x8 54 8 sad_8x16 108 13 sad_16x8 95 8 sad_16x16 189 13 sad_x3_4x4 37 6 sad_x3_4x8 71 13 sad_x3_8x4 70 8 sad_x3_8x8 162 14 sad_x3_8x16 323 25 sad_x3_16x8 279 15 sad_x3_16x16 555 27 sad_x4_4x4 49 8 sad_x4_4x8 95 17 sad_x4_8x4 94 8 sad_x4_8x8 214 16 sad_x4_8x16 429 33 sad_x4_16x8 372 18 sad_x4_16x16 740 34 Signed-off-by: wanglu <wanglu@loongson.cn>
This commit is contained in:
parent
d7d283f634
commit
00b8e3b9cd
1
Makefile
1
Makefile
|
@ -201,6 +201,7 @@ endif
|
|||
ifeq ($(SYS_ARCH),LOONGARCH)
|
||||
ifneq ($(findstring HAVE_LSX 1, $(CONFIG)),)
|
||||
SRCASM_X += common/loongarch/deblock-a.S \
|
||||
common/loongarch/sad-a.S \
|
||||
|
||||
SRCS_X +=
|
||||
|
||||
|
|
|
@ -0,0 +1,335 @@
|
|||
/*****************************************************************************
|
||||
* pixel.h: loongarch pixel metrics
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023 x264 project
|
||||
*
|
||||
* Authors: Lu Wang <wanglu@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_PIXEL_H
|
||||
#define X264_LOONGARCH_PIXEL_H
|
||||
|
||||
#define x264_pixel_satd_4x4_lsx x264_template(pixel_satd_4x4_lsx)
|
||||
int32_t x264_pixel_satd_4x4_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_4x8_lsx x264_template(pixel_satd_4x8_lsx)
|
||||
int32_t x264_pixel_satd_4x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_4x16_lsx x264_template(pixel_satd_4x16_lsx)
|
||||
int32_t x264_pixel_satd_4x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x4_lsx x264_template(pixel_satd_8x4_lsx)
|
||||
int32_t x264_pixel_satd_8x4_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x8_lsx x264_template(pixel_satd_8x8_lsx)
|
||||
int32_t x264_pixel_satd_8x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x16_lsx x264_template(pixel_satd_8x16_lsx)
|
||||
int32_t x264_pixel_satd_8x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x8_lsx x264_template(pixel_satd_16x8_lsx)
|
||||
int32_t x264_pixel_satd_16x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x16_lsx x264_template(pixel_satd_16x16_lsx)
|
||||
int32_t x264_pixel_satd_16x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_pixel_satd_4x8_lasx x264_template(pixel_satd_4x8_lasx)
|
||||
int32_t x264_pixel_satd_4x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_4x16_lasx x264_template(pixel_satd_4x16_lasx)
|
||||
int32_t x264_pixel_satd_4x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x4_lasx x264_template(pixel_satd_8x4_lasx)
|
||||
int32_t x264_pixel_satd_8x4_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x8_lasx x264_template(pixel_satd_8x8_lasx)
|
||||
int32_t x264_pixel_satd_8x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x16_lasx x264_template(pixel_satd_8x16_lasx)
|
||||
int32_t x264_pixel_satd_8x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x8_lasx x264_template(pixel_satd_16x8_lasx)
|
||||
int32_t x264_pixel_satd_16x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x16_lasx x264_template(pixel_satd_16x16_lasx)
|
||||
int32_t x264_pixel_satd_16x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_pixel_sad_x4_16x16_lsx x264_template(pixel_sad_x4_16x16_lsx)
|
||||
void x264_pixel_sad_x4_16x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_16x8_lsx x264_template(pixel_sad_x4_16x8_lsx)
|
||||
void x264_pixel_sad_x4_16x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x16_lsx x264_template(pixel_sad_x4_8x16_lsx)
|
||||
void x264_pixel_sad_x4_8x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x8_lsx x264_template(pixel_sad_x4_8x8_lsx)
|
||||
void x264_pixel_sad_x4_8x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x4_lsx x264_template(pixel_sad_x4_8x4_lsx)
|
||||
void x264_pixel_sad_x4_8x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_4x8_lsx x264_template(pixel_sad_x4_4x8_lsx)
|
||||
void x264_pixel_sad_x4_4x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
|
||||
#define x264_pixel_sad_x4_16x16_lasx x264_template(pixel_sad_x4_16x16_lasx)
|
||||
void x264_pixel_sad_x4_16x16_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_16x8_lasx x264_template(pixel_sad_x4_16x8_lasx)
|
||||
void x264_pixel_sad_x4_16x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x8_lasx x264_template(pixel_sad_x4_8x8_lasx)
|
||||
void x264_pixel_sad_x4_8x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x4_lasx x264_template(pixel_sad_x4_8x4_lasx)
|
||||
void x264_pixel_sad_x4_8x4_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_4x4_lsx x264_template(pixel_sad_x4_4x4_lsx)
|
||||
void x264_pixel_sad_x4_4x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
|
||||
#define x264_pixel_sad_x3_16x16_lsx x264_template(pixel_sad_x3_16x16_lsx)
|
||||
void x264_pixel_sad_x3_16x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_16x8_lsx x264_template(pixel_sad_x3_16x8_lsx)
|
||||
void x264_pixel_sad_x3_16x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_8x16_lsx x264_template(pixel_sad_x3_8x16_lsx)
|
||||
void x264_pixel_sad_x3_8x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_8x8_lsx x264_template(pixel_sad_x3_8x8_lsx)
|
||||
void x264_pixel_sad_x3_8x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_8x4_lsx x264_template(pixel_sad_x3_8x4_lsx)
|
||||
void x264_pixel_sad_x3_8x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_4x4_lsx x264_template(pixel_sad_x3_4x4_lsx)
|
||||
void x264_pixel_sad_x3_4x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_4x8_lsx x264_template(pixel_sad_x3_4x8_lsx)
|
||||
void x264_pixel_sad_x3_4x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#define x264_pixel_sad_x3_16x16_lasx x264_template(pixel_sad_x3_16x16_lasx)
|
||||
void x264_pixel_sad_x3_16x16_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_16x8_lasx x264_template(pixel_sad_x3_16x8_lasx)
|
||||
void x264_pixel_sad_x3_16x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#define x264_pixel_sad_16x16_lsx x264_template(pixel_sad_16x16_lsx)
|
||||
int32_t x264_pixel_sad_16x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_16x8_lsx x264_template(pixel_sad_16x8_lsx)
|
||||
int32_t x264_pixel_sad_16x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_8x16_lsx x264_template(pixel_sad_8x16_lsx)
|
||||
int32_t x264_pixel_sad_8x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_8x8_lsx x264_template(pixel_sad_8x8_lsx)
|
||||
int32_t x264_pixel_sad_8x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_8x4_lsx x264_template(pixel_sad_8x4_lsx)
|
||||
int32_t x264_pixel_sad_8x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_4x16_lsx x264_template(pixel_sad_4x16_lsx)
|
||||
int32_t x264_pixel_sad_4x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_4x8_lsx x264_template(pixel_sad_4x8_lsx)
|
||||
int32_t x264_pixel_sad_4x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_4x4_lsx x264_template(pixel_sad_4x4_lsx)
|
||||
int32_t x264_pixel_sad_4x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_pixel_sad_8x4_lasx x264_template(pixel_sad_8x4_lasx)
|
||||
int32_t x264_pixel_sad_8x4_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_hadamard_ac_8x8_lsx x264_template(hadamard_ac_8x8_lsx)
|
||||
uint64_t x264_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x8_lsx x264_template(pixel_hadamard_ac_8x8_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x16_lsx x264_template(pixel_hadamard_ac_8x16_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x8_lsx x264_template(pixel_hadamard_ac_16x8_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x16_lsx x264_template(pixel_hadamard_ac_16x16_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
|
||||
#define x264_hadamard_ac_8x8_lasx x264_template(hadamard_ac_8x8_lasx)
|
||||
uint64_t x264_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x8_lasx x264_template(pixel_hadamard_ac_8x8_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x16_lasx x264_template(pixel_hadamard_ac_8x16_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x16_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x8_lasx x264_template(pixel_hadamard_ac_16x8_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x8_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x16_lasx x264_template(pixel_hadamard_ac_16x16_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x16_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
|
||||
#define x264_intra_satd_x3_16x16_lsx x264_template(intra_satd_x3_16x16_lsx)
|
||||
void x264_intra_satd_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_satd_x3_8x8c_lsx x264_template(intra_satd_x3_8x8c_lsx)
|
||||
void x264_intra_satd_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_satd_x3_4x4_lsx x264_template(intra_satd_x3_4x4_lsx)
|
||||
void x264_intra_satd_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_satd_x3_16x16_lasx x264_template(intra_satd_x3_16x16_lasx)
|
||||
void x264_intra_satd_x3_16x16_lasx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#define x264_pixel_ssd_16x16_lsx x264_template(pixel_ssd_16x16_lsx)
|
||||
int32_t x264_pixel_ssd_16x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_16x8_lsx x264_template(pixel_ssd_16x8_lsx)
|
||||
int32_t x264_pixel_ssd_16x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x16_lsx x264_template(pixel_ssd_8x16_lsx)
|
||||
int32_t x264_pixel_ssd_8x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x8_lsx x264_template(pixel_ssd_8x8_lsx)
|
||||
int32_t x264_pixel_ssd_8x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x4_lsx x264_template(pixel_ssd_8x4_lsx)
|
||||
int32_t x264_pixel_ssd_8x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_4x16_lsx x264_template(pixel_ssd_4x16_lsx)
|
||||
int32_t x264_pixel_ssd_4x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_4x8_lsx x264_template(pixel_ssd_4x8_lsx)
|
||||
int32_t x264_pixel_ssd_4x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_4x4_lsx x264_template(pixel_ssd_4x4_lsx)
|
||||
int32_t x264_pixel_ssd_4x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_pixel_ssd_16x16_lasx x264_template(pixel_ssd_16x16_lasx)
|
||||
int32_t x264_pixel_ssd_16x16_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_16x8_lasx x264_template(pixel_ssd_16x8_lasx)
|
||||
int32_t x264_pixel_ssd_16x8_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x16_lasx x264_template(pixel_ssd_8x16_lasx)
|
||||
int32_t x264_pixel_ssd_8x16_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x8_lasx x264_template(pixel_ssd_8x8_lasx)
|
||||
int32_t x264_pixel_ssd_8x8_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_pixel_var2_8x16_lsx x264_template(pixel_var2_8x16_lsx)
|
||||
int32_t x264_pixel_var2_8x16_lsx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
#define x264_pixel_var2_8x8_lsx x264_template(pixel_var2_8x8_lsx)
|
||||
int32_t x264_pixel_var2_8x8_lsx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
#define x264_pixel_var_16x16_lsx x264_template(pixel_var_16x16_lsx)
|
||||
uint64_t x264_pixel_var_16x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_var_8x16_lsx x264_template(pixel_var_8x16_lsx)
|
||||
uint64_t x264_pixel_var_8x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_var_8x8_lsx x264_template(pixel_var_8x8_lsx)
|
||||
uint64_t x264_pixel_var_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
|
||||
#define x264_pixel_var2_8x16_lasx x264_template(pixel_var2_8x16_lasx)
|
||||
int32_t x264_pixel_var2_8x16_lasx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
#define x264_pixel_var2_8x8_lasx x264_template(pixel_var2_8x8_lasx)
|
||||
int32_t x264_pixel_var2_8x8_lasx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
|
||||
#define x264_pixel_sa8d_8x8_lsx x264_template(pixel_sa8d_8x8_lsx)
|
||||
int32_t x264_pixel_sa8d_8x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_sa8d_16x16_lsx x264_template(pixel_sa8d_16x16_lsx)
|
||||
int32_t x264_pixel_sa8d_16x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_intra_sa8d_x3_8x8_lsx x264_template(intra_sa8d_x3_8x8_lsx)
|
||||
void x264_intra_sa8d_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sa8d_x3_8x8_lasx x264_template(intra_sa8d_x3_8x8_lasx)
|
||||
void x264_intra_sa8d_x3_8x8_lasx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sa8d_8x8_lasx x264_template(pixel_sa8d_8x8_lasx)
|
||||
int32_t x264_pixel_sa8d_8x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_sa8d_16x16_lasx x264_template(pixel_sa8d_16x16_lasx)
|
||||
int32_t x264_pixel_sa8d_16x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_intra_sad_x3_16x16_lsx x264_template(intra_sad_x3_16x16_lsx)
|
||||
void x264_intra_sad_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sad_x3_8x8_lsx x264_template(intra_sad_x3_8x8_lsx)
|
||||
void x264_intra_sad_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sad_x3_8x8c_lsx x264_template(intra_sad_x3_8x8c_lsx)
|
||||
void x264_intra_sad_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sad_x3_4x4_lsx x264_template(intra_sad_x3_4x4_lsx)
|
||||
void x264_intra_sad_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -45,7 +45,9 @@
|
|||
#if HAVE_MSA
|
||||
# include "mips/pixel.h"
|
||||
#endif
|
||||
|
||||
#if HAVE_LSX
|
||||
# include "loongarch/pixel.h"
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
* pixel_sad_WxH
|
||||
|
@ -1531,6 +1533,26 @@ void x264_pixel_init( uint32_t cpu, x264_pixel_function_t *pixf )
|
|||
}
|
||||
#endif // HAVE_MSA
|
||||
|
||||
#if HAVE_LSX
|
||||
if( cpu&X264_CPU_LSX )
|
||||
{
|
||||
INIT8( sad, _lsx );
|
||||
INIT8_NAME( sad_aligned, sad, _lsx );
|
||||
INIT7( sad_x3, _lsx );
|
||||
INIT7( sad_x4, _lsx );
|
||||
}
|
||||
|
||||
if( cpu&X264_CPU_LASX )
|
||||
{
|
||||
pixf->sad_x4[PIXEL_16x16] = x264_pixel_sad_x4_16x16_lasx;
|
||||
pixf->sad_x4[PIXEL_16x8] = x264_pixel_sad_x4_16x8_lasx;
|
||||
pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_lasx;
|
||||
pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_lasx;
|
||||
pixf->sad_x3[PIXEL_16x16] = x264_pixel_sad_x3_16x16_lasx;
|
||||
pixf->sad_x3[PIXEL_16x8] = x264_pixel_sad_x3_16x8_lasx;
|
||||
}
|
||||
#endif /* HAVE_LSX */
|
||||
|
||||
#endif // HIGH_BIT_DEPTH
|
||||
#if HAVE_ALTIVEC
|
||||
if( cpu&X264_CPU_ALTIVEC )
|
||||
|
|
Loading…
Reference in New Issue