mirror of https://code.videolan.org/videolan/dav1d
loongarch: Improve the performance of mc_8bpc.mct functions
Relative speedup over C code: mct_8tap_regular_w4_0_8bpc_c: 4.2 ( 1.00x) mct_8tap_regular_w4_0_8bpc_lasx: 0.5 ( 9.08x) mct_8tap_regular_w4_h_8bpc_c: 12.5 ( 1.00x) mct_8tap_regular_w4_h_8bpc_lasx: 1.6 ( 7.80x) mct_8tap_regular_w4_hv_8bpc_c: 33.5 ( 1.00x) mct_8tap_regular_w4_hv_8bpc_lasx: 6.0 ( 5.54x) mct_8tap_regular_w4_v_8bpc_c: 13.6 ( 1.00x) mct_8tap_regular_w4_v_8bpc_lasx: 2.2 ( 6.22x) mct_8tap_regular_w8_0_8bpc_c: 11.3 ( 1.00x) mct_8tap_regular_w8_0_8bpc_lasx: 0.7 (15.77x) mct_8tap_regular_w8_h_8bpc_c: 39.1 ( 1.00x) mct_8tap_regular_w8_h_8bpc_lasx: 4.7 ( 8.30x) mct_8tap_regular_w8_hv_8bpc_c: 90.9 ( 1.00x) mct_8tap_regular_w8_hv_8bpc_lasx: 17.2 ( 5.29x) mct_8tap_regular_w8_v_8bpc_c: 40.5 ( 1.00x) mct_8tap_regular_w8_v_8bpc_lasx: 6.9 ( 5.86x) mct_8tap_regular_w16_0_8bpc_c: 34.3 ( 1.00x) mct_8tap_regular_w16_0_8bpc_lasx: 1.3 (26.32x) mct_8tap_regular_w16_h_8bpc_c: 128.3 ( 1.00x) mct_8tap_regular_w16_h_8bpc_lasx: 20.5 ( 6.26x) mct_8tap_regular_w16_hv_8bpc_c: 273.5 ( 1.00x) mct_8tap_regular_w16_hv_8bpc_lasx: 54.5 ( 5.02x) mct_8tap_regular_w16_v_8bpc_c: 129.7 ( 1.00x) mct_8tap_regular_w16_v_8bpc_lasx: 22.8 ( 5.69x) mct_8tap_regular_w32_0_8bpc_c: 133.7 ( 1.00x) mct_8tap_regular_w32_0_8bpc_lasx: 5.4 (24.65x) mct_8tap_regular_w32_h_8bpc_c: 511.4 ( 1.00x) mct_8tap_regular_w32_h_8bpc_lasx: 85.1 ( 6.01x) mct_8tap_regular_w32_hv_8bpc_c: 1018.2 ( 1.00x) mct_8tap_regular_w32_hv_8bpc_lasx: 210.0 ( 4.85x) mct_8tap_regular_w32_v_8bpc_c: 513.6 ( 1.00x) mct_8tap_regular_w32_v_8bpc_lasx: 88.7 ( 5.79x) mct_8tap_regular_w64_0_8bpc_c: 315.4 ( 1.00x) mct_8tap_regular_w64_0_8bpc_lasx: 13.2 (23.86x) mct_8tap_regular_w64_h_8bpc_c: 1236.8 ( 1.00x) mct_8tap_regular_w64_h_8bpc_lasx: 208.2 ( 5.94x) mct_8tap_regular_w64_hv_8bpc_c: 2428.0 ( 1.00x) mct_8tap_regular_w64_hv_8bpc_lasx: 502.7 ( 4.83x) mct_8tap_regular_w64_v_8bpc_c: 1238.3 ( 1.00x) mct_8tap_regular_w64_v_8bpc_lasx: 214.0 ( 5.79x) mct_8tap_regular_w128_0_8bpc_c: 775.3 ( 1.00x) mct_8tap_regular_w128_0_8bpc_lasx: 32.5 (23.86x) mct_8tap_regular_w128_h_8bpc_c: 3077.5 ( 1.00x) mct_8tap_regular_w128_h_8bpc_lasx: 518.6 ( 5.93x) mct_8tap_regular_w128_hv_8bpc_c: 5987.0 ( 1.00x) mct_8tap_regular_w128_hv_8bpc_lasx: 1242.4 ( 4.82x) mct_8tap_regular_w128_v_8bpc_c: 3077.5 ( 1.00x) mct_8tap_regular_w128_v_8bpc_lasx: 530.3 ( 5.80x)
This commit is contained in:
parent
b34ecaf310
commit
ae8756ed91
1106
src/loongarch/mc.S
1106
src/loongarch/mc.S
File diff suppressed because it is too large
Load Diff
|
@ -61,6 +61,16 @@ decl_warp8x8_fn(BF(dav1d_warp_affine_8x8, lasx));
|
|||
decl_warp8x8t_fn(BF(dav1d_warp_affine_8x8t, lasx));
|
||||
decl_w_mask_fn(BF(dav1d_w_mask_420, lasx));
|
||||
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_regular, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_regular_smooth, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_regular_sharp, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_smooth, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_smooth_regular, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_smooth_sharp, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_sharp, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_sharp_regular, lasx));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_sharp_smooth, lasx));
|
||||
|
||||
static ALWAYS_INLINE void mc_dsp_init_loongarch(Dav1dMCDSPContext *const c) {
|
||||
#if BITDEPTH == 8
|
||||
const unsigned flags = dav1d_get_cpu_flags();
|
||||
|
@ -93,6 +103,15 @@ static ALWAYS_INLINE void mc_dsp_init_loongarch(Dav1dMCDSPContext *const c) {
|
|||
c->warp8x8t = BF(dav1d_warp_affine_8x8t, lasx);
|
||||
c->w_mask[2] = BF(dav1d_w_mask_420, lasx);
|
||||
|
||||
init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, lasx);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, lasx);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue