diff --git a/libavfilter/x86/vf_transpose.asm b/libavfilter/x86/vf_transpose.asm index f9f585369a..c532c899ee 100644 --- a/libavfilter/x86/vf_transpose.asm +++ b/libavfilter/x86/vf_transpose.asm @@ -56,10 +56,7 @@ cglobal transpose_8x8_8, 4,5,8, src, src_linesize, dst, dst_linesize, linesize3 movq [dstq + linesize3q], m7 RET -%if ARCH_X86_64 - -INIT_XMM sse2 -cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3 +cglobal transpose_8x8_16, 4,5,9, ARCH_X86_32 * 32, src, src_linesize, dst, dst_linesize, linesize3 lea linesize3q, [src_linesizeq * 3] movu m0, [srcq + src_linesizeq * 0] movu m1, [srcq + src_linesizeq * 1] @@ -71,7 +68,11 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3 movu m6, [srcq + src_linesizeq * 2] movu m7, [srcq + linesize3q] +%if ARCH_X86_64 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 +%else + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp + 16] +%endif lea linesize3q, [dst_linesizeq * 3] movu [dstq + dst_linesizeq * 0], m0 @@ -84,5 +85,3 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3 movu [dstq + dst_linesizeq * 2], m6 movu [dstq + linesize3q], m7 RET - -%endif diff --git a/libavfilter/x86/vf_transpose_init.c b/libavfilter/x86/vf_transpose_init.c index f1a9cd058b..6bb9908725 100644 --- a/libavfilter/x86/vf_transpose_init.c +++ b/libavfilter/x86/vf_transpose_init.c @@ -43,7 +43,7 @@ av_cold void ff_transpose_init_x86(TransVtable *v, int pixstep) v->transpose_8x8 = ff_transpose_8x8_8_sse2; } - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && pixstep == 2) { + if (EXTERNAL_SSE2(cpu_flags) && pixstep == 2) { v->transpose_8x8 = ff_transpose_8x8_16_sse2; } }