From a34d9ad96974667ef346d192e80ff7e94f1a6434 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 18 Dec 2012 17:02:31 -0800 Subject: [PATCH 1/2] lavc: merge latest x86inc.asm fixes with x264 Unbreak NASM support. Signed-off-by: Luca Barbato --- libavutil/x86/x86inc.asm | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 60d05f45ae..2617cdf273 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -331,7 +331,9 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %if stack_size < 0 %assign stack_size -stack_size %endif - %assign xmm_regs_used %2 + %if mmsize != 8 + %assign xmm_regs_used %2 + %endif %if mmsize <= 16 && HAVE_ALIGNED_STACK %assign stack_size_padded stack_size + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1)) %if xmm_regs_used > 6 @@ -339,8 +341,8 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %endif SUB rsp, stack_size_padded %else - %assign reg_num (regs_used - 1) - %xdefine rstk r %+ reg_num + %assign %%reg_num (regs_used - 1) + %xdefine rstk r %+ %%reg_num ; align stack, and save original stack location directly above ; it, i.e. in [rsp+stack_size_padded], so we can restore the ; stack in a single instruction (i.e. mov rsp, rstk or mov @@ -349,6 +351,10 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %assign stack_size_padded stack_size %if xmm_regs_used > 6 %assign stack_size_padded stack_size_padded + (xmm_regs_used - 6) * 16 + %if mmsize == 32 && xmm_regs_used & 1 + ; re-align to 32 bytes + %assign stack_size_padded (stack_size_padded + 16) + %endif %endif %if %1 < 0 ; need to store rsp on stack sub rsp, gprsize+stack_size_padded @@ -411,11 +417,10 @@ DECLARE_REG 14, R15, 120 %macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 - SETUP_STACK_POINTER %4 ASSERT regs_used >= num_args + SETUP_STACK_POINTER %4 ASSERT regs_used <= 15 PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 - %assign xmm_regs_used 0 ALLOC_STACK %4, %3 %if mmsize != 8 && stack_size == 0 WIN64_SPILL_XMM %3 @@ -499,8 +504,8 @@ DECLARE_REG 14, R15, 72 %macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 - SETUP_STACK_POINTER %4 ASSERT regs_used >= num_args + SETUP_STACK_POINTER %4 ASSERT regs_used <= 15 PUSH_IF_USED 9, 10, 11, 12, 13, 14 ALLOC_STACK %4 @@ -549,12 +554,15 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 + ASSERT regs_used >= num_args + %if num_args > 7 + %assign num_args 7 + %endif %if regs_used > 7 %assign regs_used 7 %endif SETUP_STACK_POINTER %4 ASSERT regs_used <= 7 - ASSERT regs_used >= num_args PUSH_IF_USED 3, 4, 5, 6 ALLOC_STACK %4 LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 @@ -616,12 +624,10 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ; Applies any symbol mangling needed for C linkage, and sets up a define such that ; subsequent uses of the function name automatically refer to the mangled version. ; Appends cpuflags to the function name if cpuflags has been specified. -%macro cglobal 1-2+ ; name, [PROLOGUE args] -%if %0 == 1 - cglobal_internal %1 %+ SUFFIX -%else +%macro cglobal 1-2+ "" ; name, [PROLOGUE args] + ; the "" is a workaround for nasm, which fails if SUFFIX is empty + ; and we call cglobal_internal with just %1 %+ SUFFIX (without %2) cglobal_internal %1 %+ SUFFIX, %2 -%endif %endmacro %macro cglobal_internal 1-2+ %ifndef cglobaled_%1 @@ -642,7 +648,8 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %assign stack_offset 0 %assign stack_size 0 %assign stack_size_padded 0 - %if %0 > 1 + %assign xmm_regs_used 0 + %ifnidn %2, "" PROLOGUE %2 %endif %endmacro From 45635885e44cb7adce35ac19279d48c1ef6c4779 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 18 Dec 2012 22:57:58 +0100 Subject: [PATCH 2/2] mpegvideo: increase edge_emu_buffer size for VC1 The VC1 decoder uses edge_emu_buffer simultaneously for luma and chroma and needs more space. That was not a problem before f1d8763a02b5fce since the size for edge_emu_buffer was always calculated with 2 byte per pixel since the linesize was not known. Fixes occasionally fate errors in vc1_sa10143. --- libavcodec/mpegvideo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 8cd8df8d1f..edd52532b2 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -241,8 +241,10 @@ int ff_mpv_frame_size_alloc(MpegEncContext *s, int linesize) // edge emu needs blocksize + filter length - 1 // (= 17x17 for halfpel / 21x21 for h264) + // VC1 computes luma and chroma simultaneously and needs 19X19 + 9x9 + // at uvlinesize. It supports only YUV420 so 24x24 is enough // linesize * interlaced * MBsize - FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer, alloc_size * 2 * 21, + FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer, alloc_size * 2 * 24, fail); FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad, alloc_size * 2 * 16 * 2,