mirror of https://git.videolan.org/git/ffmpeg.git
x86/swr: convert resample_{common, linear}_double_sse2 to yasm
Signed-off-by: James Almer <jamrial@gmail.com> 312531 -> 311528 dezicycles Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
fb318def5d
commit
dd2c9034b1
|
@ -25,23 +25,15 @@
|
||||||
* @author Michael Niedermayer <michaelni@gmx.at>
|
* @author Michael Niedermayer <michaelni@gmx.at>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(TEMPLATE_RESAMPLE_DBL) \
|
#if defined(TEMPLATE_RESAMPLE_DBL)
|
||||||
|| defined(TEMPLATE_RESAMPLE_DBL_SSE2)
|
|
||||||
|
|
||||||
|
# define RENAME(N) N ## _double
|
||||||
# define FILTER_SHIFT 0
|
# define FILTER_SHIFT 0
|
||||||
# define DELEM double
|
# define DELEM double
|
||||||
# define FELEM double
|
# define FELEM double
|
||||||
# define FELEM2 double
|
# define FELEM2 double
|
||||||
# define OUT(d, v) d = v
|
# define OUT(d, v) d = v
|
||||||
|
|
||||||
# if defined(TEMPLATE_RESAMPLE_DBL)
|
|
||||||
# define RENAME(N) N ## _double
|
|
||||||
# elif defined(TEMPLATE_RESAMPLE_DBL_SSE2)
|
|
||||||
# define COMMON_CORE COMMON_CORE_DBL_SSE2
|
|
||||||
# define LINEAR_CORE LINEAR_CORE_DBL_SSE2
|
|
||||||
# define RENAME(N) N ## _double_sse2
|
|
||||||
# endif
|
|
||||||
|
|
||||||
#elif defined(TEMPLATE_RESAMPLE_FLT)
|
#elif defined(TEMPLATE_RESAMPLE_FLT)
|
||||||
|
|
||||||
# define RENAME(N) N ## _float
|
# define RENAME(N) N ## _float
|
||||||
|
@ -104,16 +96,12 @@ int RENAME(swri_resample_common)(ResampleContext *c,
|
||||||
for (dst_index = 0; dst_index < n; dst_index++) {
|
for (dst_index = 0; dst_index < n; dst_index++) {
|
||||||
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
|
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
|
||||||
|
|
||||||
#ifdef COMMON_CORE
|
|
||||||
COMMON_CORE
|
|
||||||
#else
|
|
||||||
FELEM2 val=0;
|
FELEM2 val=0;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < c->filter_length; i++) {
|
for (i = 0; i < c->filter_length; i++) {
|
||||||
val += src[sample_index + i] * (FELEM2)filter[i];
|
val += src[sample_index + i] * (FELEM2)filter[i];
|
||||||
}
|
}
|
||||||
OUT(dst[dst_index], val);
|
OUT(dst[dst_index], val);
|
||||||
#endif
|
|
||||||
|
|
||||||
frac += c->dst_incr_mod;
|
frac += c->dst_incr_mod;
|
||||||
index += c->dst_incr_div;
|
index += c->dst_incr_div;
|
||||||
|
@ -150,15 +138,11 @@ int RENAME(swri_resample_linear)(ResampleContext *c,
|
||||||
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
|
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
|
||||||
FELEM2 val=0, v2 = 0;
|
FELEM2 val=0, v2 = 0;
|
||||||
|
|
||||||
#ifdef LINEAR_CORE
|
|
||||||
LINEAR_CORE
|
|
||||||
#else
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < c->filter_length; i++) {
|
for (i = 0; i < c->filter_length; i++) {
|
||||||
val += src[sample_index + i] * (FELEM2)filter[i];
|
val += src[sample_index + i] * (FELEM2)filter[i];
|
||||||
v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
|
v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#ifdef FELEML
|
#ifdef FELEML
|
||||||
val += (v2 - val) * (FELEML) frac / c->src_incr;
|
val += (v2 - val) * (FELEML) frac / c->src_incr;
|
||||||
#else
|
#else
|
||||||
|
@ -188,8 +172,6 @@ int RENAME(swri_resample_linear)(ResampleContext *c,
|
||||||
return sample_index;
|
return sample_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef COMMON_CORE
|
|
||||||
#undef LINEAR_CORE
|
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef FILTER_SHIFT
|
#undef FILTER_SHIFT
|
||||||
#undef DELEM
|
#undef DELEM
|
||||||
|
|
|
@ -50,11 +50,12 @@ endstruc
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
|
|
||||||
pf_1: dd 1.0
|
pf_1: dd 1.0
|
||||||
|
pdbl_1: dq 1.0
|
||||||
pd_0x4000: dd 0x4000
|
pd_0x4000: dd 0x4000
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
%macro RESAMPLE_FNS 3 ; format [float or int16], bps, log2_bps
|
%macro RESAMPLE_FNS 3-5 ; format [float or int16], bps, log2_bps, float op suffix [s or d], 1.0 constant
|
||||||
; int resample_common_$format(ResampleContext *ctx, $format *dst,
|
; int resample_common_$format(ResampleContext *ctx, $format *dst,
|
||||||
; const $format *src, int size, int update_ctx)
|
; const $format *src, int size, int update_ctx)
|
||||||
%if ARCH_X86_64 ; unix64 and win64
|
%if ARCH_X86_64 ; unix64 and win64
|
||||||
|
@ -165,21 +166,21 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||||
lea filterq, [min_filter_count_x4q+filterq*%2]
|
lea filterq, [min_filter_count_x4q+filterq*%2]
|
||||||
mov min_filter_count_x4q, min_filter_length_x4q
|
mov min_filter_count_x4q, min_filter_length_x4q
|
||||||
%endif
|
%endif
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
xorps m0, m0, m0
|
|
||||||
%else ; int16
|
|
||||||
movd m0, [pd_0x4000]
|
movd m0, [pd_0x4000]
|
||||||
|
%else ; float/double
|
||||||
|
xorps m0, m0, m0
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
.inner_loop:
|
.inner_loop:
|
||||||
movu m1, [srcq+min_filter_count_x4q*1]
|
movu m1, [srcq+min_filter_count_x4q*1]
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
mulps m1, m1, [filterq+min_filter_count_x4q*1]
|
|
||||||
addps m0, m0, m1
|
|
||||||
%else ; int16
|
|
||||||
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
||||||
paddd m0, m1
|
paddd m0, m1
|
||||||
|
%else ; float/double
|
||||||
|
mulp%4 m1, m1, [filterq+min_filter_count_x4q*1]
|
||||||
|
addp%4 m0, m0, m1
|
||||||
%endif
|
%endif
|
||||||
add min_filter_count_x4q, mmsize
|
add min_filter_count_x4q, mmsize
|
||||||
js .inner_loop
|
js .inner_loop
|
||||||
|
@ -189,16 +190,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||||
addps xm0, xm1
|
addps xm0, xm1
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
; horizontal sum & store
|
%ifidn %1, int16
|
||||||
%ifidn %1, float
|
|
||||||
movhlps xm1, xm0
|
|
||||||
addps xm0, xm1
|
|
||||||
shufps xm1, xm0, xm0, q0001
|
|
||||||
add fracd, dst_incr_modd
|
|
||||||
addps xm0, xm1
|
|
||||||
add indexd, dst_incr_divd
|
|
||||||
movss [dstq], xm0
|
|
||||||
%else ; int16
|
|
||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
pshufd m1, m0, q0032
|
pshufd m1, m0, q0032
|
||||||
paddd m0, m1
|
paddd m0, m1
|
||||||
|
@ -212,6 +204,17 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||||
packssdw m0, m0
|
packssdw m0, m0
|
||||||
add indexd, dst_incr_divd
|
add indexd, dst_incr_divd
|
||||||
movd [dstq], m0
|
movd [dstq], m0
|
||||||
|
%else ; float/double
|
||||||
|
; horizontal sum & store
|
||||||
|
movhlps xm1, xm0
|
||||||
|
%ifidn %1, float
|
||||||
|
addps xm0, xm1
|
||||||
|
shufps xm1, xm0, xm0, q0001
|
||||||
|
%endif
|
||||||
|
add fracd, dst_incr_modd
|
||||||
|
addp%4 xm0, xm1
|
||||||
|
add indexd, dst_incr_divd
|
||||||
|
movs%4 [dstq], xm0
|
||||||
%endif
|
%endif
|
||||||
cmp fracd, src_incrd
|
cmp fracd, src_incrd
|
||||||
jl .skip
|
jl .skip
|
||||||
|
@ -307,12 +310,12 @@ cglobal resample_linear_%1, 0, 15, 5, ctx, phase_mask, src, phase_shift, index,
|
||||||
mov ctx_stackq, ctxq
|
mov ctx_stackq, ctxq
|
||||||
mov phase_mask_stackd, phase_maskd
|
mov phase_mask_stackd, phase_maskd
|
||||||
mov min_filter_len_x4d, [ctxq+ResampleContext.filter_length]
|
mov min_filter_len_x4d, [ctxq+ResampleContext.filter_length]
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
cvtsi2ss xm0, src_incrd
|
|
||||||
movss xm4, [pf_1]
|
|
||||||
divss xm4, xm0
|
|
||||||
%else ; int16
|
|
||||||
movd m4, [pd_0x4000]
|
movd m4, [pd_0x4000]
|
||||||
|
%else ; float/double
|
||||||
|
cvtsi2s%4 xm0, src_incrd
|
||||||
|
movs%4 xm4, [%5]
|
||||||
|
divs%4 xm4, xm0
|
||||||
%endif
|
%endif
|
||||||
mov dst_incr_divd, [ctxq+ResampleContext.dst_incr_div]
|
mov dst_incr_divd, [ctxq+ResampleContext.dst_incr_div]
|
||||||
shl min_filter_len_x4d, %3
|
shl min_filter_len_x4d, %3
|
||||||
|
@ -360,12 +363,12 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||||
mov r3, dword [ctxq+ResampleContext.src_incr]
|
mov r3, dword [ctxq+ResampleContext.src_incr]
|
||||||
PUSH dword [ctxq+ResampleContext.phase_mask]
|
PUSH dword [ctxq+ResampleContext.phase_mask]
|
||||||
PUSH r3d
|
PUSH r3d
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
cvtsi2ss xm0, r3d
|
|
||||||
movss xm4, [pf_1]
|
|
||||||
divss xm4, xm0
|
|
||||||
%else ; int16
|
|
||||||
movd m4, [pd_0x4000]
|
movd m4, [pd_0x4000]
|
||||||
|
%else ; float/double
|
||||||
|
cvtsi2s%4 xm0, r3d
|
||||||
|
movs%4 xm4, [%5]
|
||||||
|
divs%4 xm4, xm0
|
||||||
%endif
|
%endif
|
||||||
mov min_filter_length_x4d, [ctxq+ResampleContext.filter_length]
|
mov min_filter_length_x4d, [ctxq+ResampleContext.filter_length]
|
||||||
mov indexd, [ctxq+ResampleContext.index]
|
mov indexd, [ctxq+ResampleContext.index]
|
||||||
|
@ -409,27 +412,27 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||||
mov filter2q, filter1q
|
mov filter2q, filter1q
|
||||||
add filter2q, filter_alloc_x4q
|
add filter2q, filter_alloc_x4q
|
||||||
%endif
|
%endif
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
xorps m0, m0, m0
|
|
||||||
xorps m2, m2, m2
|
|
||||||
%else ; int16
|
|
||||||
mova m0, m4
|
mova m0, m4
|
||||||
mova m2, m4
|
mova m2, m4
|
||||||
|
%else ; float/double
|
||||||
|
xorps m0, m0, m0
|
||||||
|
xorps m2, m2, m2
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
.inner_loop:
|
.inner_loop:
|
||||||
movu m1, [srcq+min_filter_count_x4q*1]
|
movu m1, [srcq+min_filter_count_x4q*1]
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
mulps m3, m1, [filter2q+min_filter_count_x4q*1]
|
|
||||||
mulps m1, m1, [filter1q+min_filter_count_x4q*1]
|
|
||||||
addps m2, m2, m3
|
|
||||||
addps m0, m0, m1
|
|
||||||
%else ; int16
|
|
||||||
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1]
|
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1]
|
||||||
pmaddwd m1, [filter1q+min_filter_count_x4q*1]
|
pmaddwd m1, [filter1q+min_filter_count_x4q*1]
|
||||||
paddd m2, m3
|
paddd m2, m3
|
||||||
paddd m0, m1
|
paddd m0, m1
|
||||||
|
%else ; float/double
|
||||||
|
mulp%4 m3, m1, [filter2q+min_filter_count_x4q*1]
|
||||||
|
mulp%4 m1, m1, [filter1q+min_filter_count_x4q*1]
|
||||||
|
addp%4 m2, m2, m3
|
||||||
|
addp%4 m0, m0, m1
|
||||||
%endif
|
%endif
|
||||||
add min_filter_count_x4q, mmsize
|
add min_filter_count_x4q, mmsize
|
||||||
js .inner_loop
|
js .inner_loop
|
||||||
|
@ -441,24 +444,7 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||||
addps xm2, xm3
|
addps xm2, xm3
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifidn %1, float
|
%ifidn %1, int16
|
||||||
; val += (v2 - val) * (FELEML) frac / c->src_incr;
|
|
||||||
cvtsi2ss xm1, fracd
|
|
||||||
subps xm2, xm0
|
|
||||||
mulps xm1, xm4
|
|
||||||
shufps xm1, xm1, q0000
|
|
||||||
mulps xm2, xm1
|
|
||||||
addps xm0, xm2
|
|
||||||
|
|
||||||
; horizontal sum & store
|
|
||||||
movhlps xm1, xm0
|
|
||||||
addps xm0, xm1
|
|
||||||
shufps xm1, xm0, xm0, q0001
|
|
||||||
add fracd, dst_incr_modd
|
|
||||||
addps xm0, xm1
|
|
||||||
add indexd, dst_incr_divd
|
|
||||||
movss [dstq], xm0
|
|
||||||
%else ; int16
|
|
||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
pshufd m3, m2, q0032
|
pshufd m3, m2, q0032
|
||||||
pshufd m1, m0, q0032
|
pshufd m1, m0, q0032
|
||||||
|
@ -491,6 +477,25 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||||
; - 32bit: eax=r0[filter1], edx=r2[filter2]
|
; - 32bit: eax=r0[filter1], edx=r2[filter2]
|
||||||
; - win64: eax=r6[filter1], edx=r1[todo]
|
; - win64: eax=r6[filter1], edx=r1[todo]
|
||||||
; - unix64: eax=r6[filter1], edx=r2[todo]
|
; - unix64: eax=r6[filter1], edx=r2[todo]
|
||||||
|
%else ; float/double
|
||||||
|
; val += (v2 - val) * (FELEML) frac / c->src_incr;
|
||||||
|
cvtsi2s%4 xm1, fracd
|
||||||
|
subp%4 xm2, xm0
|
||||||
|
mulp%4 xm1, xm4
|
||||||
|
shufp%4 xm1, xm1, q0000
|
||||||
|
mulp%4 xm2, xm1
|
||||||
|
addp%4 xm0, xm2
|
||||||
|
|
||||||
|
; horizontal sum & store
|
||||||
|
movhlps xm1, xm0
|
||||||
|
%ifidn %1, float
|
||||||
|
addps xm0, xm1
|
||||||
|
shufps xm1, xm0, xm0, q0001
|
||||||
|
%endif
|
||||||
|
add fracd, dst_incr_modd
|
||||||
|
addp%4 xm0, xm1
|
||||||
|
add indexd, dst_incr_divd
|
||||||
|
movs%4 [dstq], xm0
|
||||||
%endif
|
%endif
|
||||||
cmp fracd, src_incrd
|
cmp fracd, src_incrd
|
||||||
jl .skip
|
jl .skip
|
||||||
|
@ -553,11 +558,11 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM sse
|
INIT_XMM sse
|
||||||
RESAMPLE_FNS float, 4, 2
|
RESAMPLE_FNS float, 4, 2, s, pf_1
|
||||||
|
|
||||||
%if HAVE_AVX_EXTERNAL
|
%if HAVE_AVX_EXTERNAL
|
||||||
INIT_YMM avx
|
INIT_YMM avx
|
||||||
RESAMPLE_FNS float, 4, 2
|
RESAMPLE_FNS float, 4, 2, s, pf_1
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%if ARCH_X86_32
|
%if ARCH_X86_32
|
||||||
|
@ -567,3 +572,4 @@ RESAMPLE_FNS int16, 2, 1
|
||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
RESAMPLE_FNS int16, 2, 1
|
RESAMPLE_FNS int16, 2, 1
|
||||||
|
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
||||||
|
|
|
@ -1,72 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "libavutil/x86/asm.h"
|
|
||||||
#include "libavutil/cpu.h"
|
|
||||||
#include "libswresample/swresample_internal.h"
|
|
||||||
|
|
||||||
#define COMMON_CORE_DBL_SSE2 \
|
|
||||||
x86_reg len= -8*c->filter_length;\
|
|
||||||
__asm__ volatile(\
|
|
||||||
"xorpd %%xmm0, %%xmm0 \n\t"\
|
|
||||||
"1: \n\t"\
|
|
||||||
"movupd (%1, %0), %%xmm1 \n\t"\
|
|
||||||
"mulpd (%2, %0), %%xmm1 \n\t"\
|
|
||||||
"addpd %%xmm1, %%xmm0 \n\t"\
|
|
||||||
"add $16, %0 \n\t"\
|
|
||||||
" js 1b \n\t"\
|
|
||||||
"movhlps %%xmm0, %%xmm1 \n\t"\
|
|
||||||
"addpd %%xmm1, %%xmm0 \n\t"\
|
|
||||||
"movsd %%xmm0, (%3) \n\t"\
|
|
||||||
: "+r" (len)\
|
|
||||||
: "r" (((uint8_t*)(src+sample_index))-len),\
|
|
||||||
"r" (((uint8_t*)filter)-len),\
|
|
||||||
"r" (dst+dst_index)\
|
|
||||||
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1")\
|
|
||||||
);
|
|
||||||
|
|
||||||
#define LINEAR_CORE_DBL_SSE2 \
|
|
||||||
x86_reg len= -8*c->filter_length;\
|
|
||||||
__asm__ volatile(\
|
|
||||||
"xorpd %%xmm0, %%xmm0 \n\t"\
|
|
||||||
"xorpd %%xmm2, %%xmm2 \n\t"\
|
|
||||||
"1: \n\t"\
|
|
||||||
"movupd (%3, %0), %%xmm1 \n\t"\
|
|
||||||
"movapd %%xmm1, %%xmm3 \n\t"\
|
|
||||||
"mulpd (%4, %0), %%xmm1 \n\t"\
|
|
||||||
"mulpd (%5, %0), %%xmm3 \n\t"\
|
|
||||||
"addpd %%xmm1, %%xmm0 \n\t"\
|
|
||||||
"addpd %%xmm3, %%xmm2 \n\t"\
|
|
||||||
"add $16, %0 \n\t"\
|
|
||||||
" js 1b \n\t"\
|
|
||||||
"movhlps %%xmm0, %%xmm1 \n\t"\
|
|
||||||
"movhlps %%xmm2, %%xmm3 \n\t"\
|
|
||||||
"addpd %%xmm1, %%xmm0 \n\t"\
|
|
||||||
"addpd %%xmm3, %%xmm2 \n\t"\
|
|
||||||
"movsd %%xmm0, %1 \n\t"\
|
|
||||||
"movsd %%xmm2, %2 \n\t"\
|
|
||||||
: "+r" (len),\
|
|
||||||
"=m" (val),\
|
|
||||||
"=m" (v2)\
|
|
||||||
: "r" (((uint8_t*)(src+sample_index))-len),\
|
|
||||||
"r" (((uint8_t*)filter)-len),\
|
|
||||||
"r" (((uint8_t*)(filter+c->filter_alloc))-len)\
|
|
||||||
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\
|
|
||||||
);
|
|
|
@ -27,21 +27,6 @@
|
||||||
|
|
||||||
#include "libswresample/resample.h"
|
#include "libswresample/resample.h"
|
||||||
|
|
||||||
int swri_resample_common_double_sse2(ResampleContext *c, double *dst, const double *src, int n, int update_ctx);
|
|
||||||
int swri_resample_linear_double_sse2(ResampleContext *c, double *dst, const double *src, int n, int update_ctx);
|
|
||||||
|
|
||||||
#if HAVE_SSE2_INLINE
|
|
||||||
#define DO_RESAMPLE_ONE 0
|
|
||||||
|
|
||||||
#include "resample_mmx.h"
|
|
||||||
|
|
||||||
#define TEMPLATE_RESAMPLE_DBL_SSE2
|
|
||||||
#include "libswresample/resample_template.c"
|
|
||||||
#undef TEMPLATE_RESAMPLE_DBL_SSE2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef DO_RESAMPLE_ONE
|
|
||||||
|
|
||||||
int ff_resample_common_int16_mmxext(ResampleContext *c, uint8_t *dst,
|
int ff_resample_common_int16_mmxext(ResampleContext *c, uint8_t *dst,
|
||||||
const uint8_t *src, int sz, int upd);
|
const uint8_t *src, int sz, int upd);
|
||||||
int ff_resample_linear_int16_mmxext(ResampleContext *c, uint8_t *dst,
|
int ff_resample_linear_int16_mmxext(ResampleContext *c, uint8_t *dst,
|
||||||
|
@ -62,6 +47,11 @@ int ff_resample_common_float_avx(ResampleContext *c, uint8_t *dst,
|
||||||
int ff_resample_linear_float_avx(ResampleContext *c, uint8_t *dst,
|
int ff_resample_linear_float_avx(ResampleContext *c, uint8_t *dst,
|
||||||
const uint8_t *src, int sz, int upd);
|
const uint8_t *src, int sz, int upd);
|
||||||
|
|
||||||
|
int ff_resample_common_double_sse2(ResampleContext *c, uint8_t *dst,
|
||||||
|
const uint8_t *src, int sz, int upd);
|
||||||
|
int ff_resample_linear_double_sse2(ResampleContext *c, uint8_t *dst,
|
||||||
|
const uint8_t *src, int sz, int upd);
|
||||||
|
|
||||||
void swresample_dsp_x86_init(ResampleContext *c)
|
void swresample_dsp_x86_init(ResampleContext *c)
|
||||||
{
|
{
|
||||||
int av_unused mm_flags = av_get_cpu_flags();
|
int av_unused mm_flags = av_get_cpu_flags();
|
||||||
|
@ -78,10 +68,9 @@ void swresample_dsp_x86_init(ResampleContext *c)
|
||||||
if (HAVE_SSE2_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE2) {
|
if (HAVE_SSE2_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE2) {
|
||||||
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_sse2;
|
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_sse2;
|
||||||
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_sse2;
|
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_sse2;
|
||||||
}
|
|
||||||
if (HAVE_SSE2_INLINE && mm_flags & AV_CPU_FLAG_SSE2) {
|
c->dsp.resample_common[FNIDX(DBLP)] = ff_resample_common_double_sse2;
|
||||||
c->dsp.resample_common[FNIDX(DBLP)] = (resample_fn) swri_resample_common_double_sse2;
|
c->dsp.resample_linear[FNIDX(DBLP)] = ff_resample_linear_double_sse2;
|
||||||
c->dsp.resample_linear[FNIDX(DBLP)] = (resample_fn) swri_resample_linear_double_sse2;
|
|
||||||
}
|
}
|
||||||
if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
|
if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
|
||||||
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_avx;
|
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_avx;
|
||||||
|
|
Loading…
Reference in New Issue