mirror of
https://git.videolan.org/git/ffmpeg.git
synced 2024-09-16 03:44:15 +02:00
x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32
Reviewed-by: Rostislav Pehlivanov <atomnuker@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
41d7642a7b
commit
7a15cf42ee
@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
|
|||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
%if ARCH_X86_64 == 1
|
INIT_XMM sse4
|
||||||
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
|
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
|
||||||
cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h
|
%if ARCH_X86_64
|
||||||
mov r6, srcq
|
cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, src_stride, w, h, t1, t2
|
||||||
mov r7, dstq
|
%else
|
||||||
mov r8, wq
|
cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, src_stride, w, t1, t2
|
||||||
|
%define hd r5mp
|
||||||
|
%endif
|
||||||
|
shl wd, 2
|
||||||
|
add srcq, wq
|
||||||
|
neg wq
|
||||||
|
mov t2q, dstq
|
||||||
|
mov t1q, wq
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
mova m3, [clip_10bit]
|
mova m3, [clip_10bit]
|
||||||
mova m4, [convert_to_unsigned_10bit]
|
mova m4, [convert_to_unsigned_10bit]
|
||||||
|
|
||||||
.loop_h:
|
.loop_h:
|
||||||
mov srcq, r6
|
mov dstq, t2q
|
||||||
mov dstq, r7
|
mov wq, t1q
|
||||||
mov wq, r8
|
|
||||||
|
|
||||||
.loop_w:
|
.loop_w:
|
||||||
movu m0, [srcq+0*mmsize]
|
movu m0, [srcq+wq+0*mmsize]
|
||||||
movu m1, [srcq+1*mmsize]
|
movu m1, [srcq+wq+1*mmsize]
|
||||||
|
|
||||||
paddd m0, m4
|
paddd m0, m4
|
||||||
paddd m1, m4
|
paddd m1, m4
|
||||||
@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w
|
|||||||
|
|
||||||
movu [dstq], m0
|
movu [dstq], m0
|
||||||
|
|
||||||
add srcq, 2*mmsize
|
|
||||||
add dstq, 1*mmsize
|
add dstq, 1*mmsize
|
||||||
sub wd, 8
|
add wq, 2*mmsize
|
||||||
jg .loop_w
|
jl .loop_w
|
||||||
|
|
||||||
add r6, src_strideq
|
add srcq, src_strideq
|
||||||
add r7, dst_strideq
|
add t2q, dst_strideq
|
||||||
sub hd, 1
|
sub hd, 1
|
||||||
jg .loop_h
|
jg .loop_h
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
%endif
|
|
||||||
|
@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
|
|||||||
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||||
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||||
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||||
#if ARCH_X86_64
|
|
||||||
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
|
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
|
||||||
#endif
|
|
||||||
|
|
||||||
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
||||||
|
|
||||||
@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
|
|||||||
|
|
||||||
if (EXTERNAL_SSE4(mm_flags)) {
|
if (EXTERNAL_SSE4(mm_flags)) {
|
||||||
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
|
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
|
||||||
#if ARCH_X86_64
|
|
||||||
c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
|
c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user