vp9/x86: iwht4x4 (lossless) mmx.

This commit is contained in:
Ronald S. Bultje 2014-01-20 20:04:48 -05:00
parent d43efa68bd
commit 97474d527f
2 changed files with 46 additions and 0 deletions

View File

@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3);
itxfm_funcs(16, avx);
itxfm_func(idct, idct, 32, ssse3);
itxfm_func(idct, idct, 32, avx);
itxfm_func(iwht, iwht, 4, mmx);
#undef itxfm_func
#undef itxfm_funcs
@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (EXTERNAL_MMX(cpu_flags)) {
init_fpel(4, 0, 4, put, mmx);
init_fpel(3, 0, 8, put, mmx);
dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
}
if (EXTERNAL_MMXEXT(cpu_flags)) {

View File

@ -151,6 +151,47 @@ SECTION .text
%endrep
%endmacro
;-------------------------------------------------------------------------------------------
; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;-------------------------------------------------------------------------------------------
%macro VP9_IWHT4_1D 0
SWAP 1, 2, 3
paddw m0, m2
psubw m3, m1
psubw m4, m0, m3
psraw m4, 1
psubw m5, m4, m1
SWAP 5, 1
psubw m4, m2
SWAP 4, 2
psubw m0, m1
paddw m3, m2
SWAP 3, 2, 1
%endmacro
INIT_MMX mmx
cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
mova m0, [blockq+0*8]
mova m1, [blockq+1*8]
mova m2, [blockq+2*8]
mova m3, [blockq+3*8]
psraw m0, 2
psraw m1, 2
psraw m2, 2
psraw m3, 2
VP9_IWHT4_1D
TRANSPOSE4x4W 0, 1, 2, 3, 4
VP9_IWHT4_1D
pxor m4, m4
VP9_STORE_2X 0, 1, 5, 6, 4
lea dstq, [dstq+strideq*2]
VP9_STORE_2X 2, 3, 5, 6, 4
ZERO_BLOCK blockq, 8, 4, m4
RET
;-------------------------------------------------------------------------------------------
; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;-------------------------------------------------------------------------------------------