mirror of https://code.videolan.org/videolan/dav1d
loongarch: Improve the performance of mask_c, w_mask_420 functions
Relative speedup over C code: mask_w4_8bpc_c: 9.2 ( 1.00x) mask_w4_8bpc_lsx: 1.1 ( 8.31x) mask_w4_8bpc_lasx: 1.2 ( 7.42x) mask_w8_8bpc_c: 27.4 ( 1.00x) mask_w8_8bpc_lsx: 2.6 (10.54x) mask_w8_8bpc_lasx: 1.9 (14.65x) mask_w16_8bpc_c: 87.2 ( 1.00x) mask_w16_8bpc_lsx: 8.0 (10.92x) mask_w16_8bpc_lasx: 6.5 (13.46x) mask_w32_8bpc_c: 343.4 ( 1.00x) mask_w32_8bpc_lsx: 31.7 (10.84x) mask_w32_8bpc_lasx: 22.1 (15.51x) mask_w64_8bpc_c: 824.9 ( 1.00x) mask_w64_8bpc_lsx: 78.0 (10.57x) mask_w64_8bpc_lasx: 54.1 (15.25x) mask_w128_8bpc_c: 2042.9 ( 1.00x) mask_w128_8bpc_lsx: 200.7 (10.18x) mask_w128_8bpc_lasx: 157.1 (13.00x) w_mask_420_w4_8bpc_c: 19.0 ( 1.00x) w_mask_420_w4_8bpc_lsx: 1.7 (11.11x) w_mask_420_w4_8bpc_lasx: 1.2 (15.87x) w_mask_420_w8_8bpc_c: 58.2 ( 1.00x) w_mask_420_w8_8bpc_lsx: 4.6 (12.58x) w_mask_420_w8_8bpc_lasx: 2.5 (23.74x) w_mask_420_w16_8bpc_c: 188.0 ( 1.00x) w_mask_420_w16_8bpc_lsx: 11.8 (15.88x) w_mask_420_w16_8bpc_lasx: 8.3 (22.66x) w_mask_420_w32_8bpc_c: 742.2 ( 1.00x) w_mask_420_w32_8bpc_lsx: 47.3 (15.68x) w_mask_420_w32_8bpc_lasx: 32.7 (22.68x) w_mask_420_w64_8bpc_c: 1786.3 ( 1.00x) w_mask_420_w64_8bpc_lsx: 112.4 (15.89x) w_mask_420_w64_8bpc_lasx: 78.4 (22.78x) w_mask_420_w128_8bpc_c: 4442.2 ( 1.00x) w_mask_420_w128_8bpc_lsx: 298.9 (14.86x) w_mask_420_w128_8bpc_lasx: 220.5 (20.15x)
This commit is contained in:
parent
bde69a94bf
commit
4080673c17
1033
src/loongarch/mc.S
1033
src/loongarch/mc.S
File diff suppressed because it is too large
Load Diff
|
@ -33,12 +33,16 @@
|
|||
#include "src/cpu.h"
|
||||
|
||||
decl_w_avg_fn(BF(dav1d_w_avg, lsx));
|
||||
decl_mask_fn(BF(dav1d_mask, lsx));
|
||||
decl_warp8x8_fn(BF(dav1d_warp_affine_8x8, lsx));
|
||||
decl_warp8x8t_fn(BF(dav1d_warp_affine_8x8t, lsx));
|
||||
decl_w_mask_fn(BF(dav1d_w_mask_420, lsx));
|
||||
|
||||
decl_w_avg_fn(BF(dav1d_w_avg, lasx));
|
||||
decl_mask_fn(BF(dav1d_mask, lasx));
|
||||
decl_warp8x8_fn(BF(dav1d_warp_affine_8x8, lasx));
|
||||
decl_warp8x8t_fn(BF(dav1d_warp_affine_8x8t, lasx));
|
||||
decl_w_mask_fn(BF(dav1d_w_mask_420, lasx));
|
||||
|
||||
static ALWAYS_INLINE void mc_dsp_init_loongarch(Dav1dMCDSPContext *const c) {
|
||||
#if BITDEPTH == 8
|
||||
|
@ -47,14 +51,18 @@ static ALWAYS_INLINE void mc_dsp_init_loongarch(Dav1dMCDSPContext *const c) {
|
|||
if (!(flags & DAV1D_LOONGARCH_CPU_FLAG_LSX)) return;
|
||||
|
||||
c->w_avg = BF(dav1d_w_avg, lsx);
|
||||
c->mask = BF(dav1d_mask, lsx);
|
||||
c->warp8x8 = BF(dav1d_warp_affine_8x8, lsx);
|
||||
c->warp8x8t = BF(dav1d_warp_affine_8x8t, lsx);
|
||||
c->w_mask[2] = BF(dav1d_w_mask_420, lsx);
|
||||
|
||||
if (!(flags & DAV1D_LOONGARCH_CPU_FLAG_LASX)) return;
|
||||
|
||||
c->w_avg = BF(dav1d_w_avg, lasx);
|
||||
c->mask = BF(dav1d_mask, lasx);
|
||||
c->warp8x8 = BF(dav1d_warp_affine_8x8, lasx);
|
||||
c->warp8x8t = BF(dav1d_warp_affine_8x8t, lasx);
|
||||
c->w_mask[2] = BF(dav1d_w_mask_420, lasx);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue