diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 3810853362..dd2c499b9d 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -1374,6 +1374,115 @@ PRED8x8L_VERTICAL ssse3 ;void pred8x8l_down_left(uint8_t *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- +INIT_MMX +%define PALIGNR PALIGNR_MMX +cglobal pred8x8l_down_left_mmxext, 4,5 + sub r0, r3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1, r1 + jz .fix_lt_2 + test r2, r2 + jz .fix_tr_1 + jmp .do_top +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2, r2 + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.fix_tr_2: + punpckhbw mm3, mm3 + pshufw mm1, mm3, 0xFF + jmp .do_topright +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq mm7, mm4 + test r2, r2 + jz .fix_tr_2 + movq mm0, [r0+8] + movq mm5, mm0 + movq mm2, mm0 + movq mm4, mm0 + psrlq mm5, 56 + PALIGNR mm2, mm3, 7, mm3 + PALIGNR mm5, mm4, 1, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 +.do_topright: + lea r1, [r0+r3*2] + movq mm6, mm1 + psrlq mm1, 56 + movq mm4, mm1 + lea r2, [r1+r3*2] + movq mm2, mm6 + PALIGNR mm2, mm7, 1, mm0 + movq mm3, mm6 + PALIGNR mm3, mm7, 7, mm0 + PALIGNR mm4, mm6, 1, mm0 + movq mm5, mm7 + movq mm1, mm7 + movq mm7, mm6 + lea r4, [r2+r3*2] + psllq mm1, 8 + PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6 + PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6 + movq [r4+r3*2], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r4+r3*1], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r2+r3*2], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r2+r3*1], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r1+r3*2], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r1+r3*1], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r0+r3*2], mm1 + psllq mm1, 8 + psrlq mm0, 56 + por mm1, mm0 + movq [r0+r3*1], mm1 + RET + %macro PRED8x8L_DOWN_LEFT 1 cglobal pred8x8l_down_left_%1, 4,4 sub r0, r3 diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 0a45d4f3ff..e93fb917cc 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -67,6 +67,7 @@ void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topri void ff_pred8x8l_horizontal_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_vertical_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_down_left_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_down_left_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_down_right_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); @@ -132,6 +133,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_mmxext; h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_mmxext; h->pred4x4 [DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_mmxext; h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_mmxext;