1
mirror of https://git.videolan.org/git/ffmpeg.git synced 2024-08-10 11:25:05 +02:00

ARM: convert VFP code to UAL syntax

Originally committed as revision 15994 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Måns Rullgård 2008-12-03 20:16:01 +00:00
parent 289e8fd001
commit b0e8ce55ae

View File

@ -21,6 +21,7 @@
#include "config.h" #include "config.h"
#include "asm.S" #include "asm.S"
.fpu neon @ required for gas to accept UAL syntax
/* /*
* VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
* throughput for almost all the instructions (except for double precision * throughput for almost all the instructions (except for double precision
@ -48,29 +49,29 @@ function ff_vector_fmul_vfp, export=1
orr r12, r12, #(3 << 16) /* set vector size to 4 */ orr r12, r12, #(3 << 16) /* set vector size to 4 */
fmxr fpscr, r12 fmxr fpscr, r12
fldmias r3!, {s0-s3} vldmia r3!, {s0-s3}
fldmias r1!, {s8-s11} vldmia r1!, {s8-s11}
fldmias r3!, {s4-s7} vldmia r3!, {s4-s7}
fldmias r1!, {s12-s15} vldmia r1!, {s12-s15}
fmuls s8, s0, s8 vmul.f32 s8, s0, s8
1: 1:
subs r2, r2, #16 subs r2, r2, #16
fmuls s12, s4, s12 vmul.f32 s12, s4, s12
fldmiasge r3!, {s16-s19} vldmiage r3!, {s16-s19}
fldmiasge r1!, {s24-s27} vldmiage r1!, {s24-s27}
fldmiasge r3!, {s20-s23} vldmiage r3!, {s20-s23}
fldmiasge r1!, {s28-s31} vldmiage r1!, {s28-s31}
fmulsge s24, s16, s24 vmulge.f32 s24, s16, s24
fstmias r0!, {s8-s11} vstmia r0!, {s8-s11}
fstmias r0!, {s12-s15} vstmia r0!, {s12-s15}
fmulsge s28, s20, s28 vmulge.f32 s28, s20, s28
fldmiasgt r3!, {s0-s3} vldmiagt r3!, {s0-s3}
fldmiasgt r1!, {s8-s11} vldmiagt r1!, {s8-s11}
fldmiasgt r3!, {s4-s7} vldmiagt r3!, {s4-s7}
fldmiasgt r1!, {s12-s15} vldmiagt r1!, {s12-s15}
fmulsge s8, s0, s8 vmulge.f32 s8, s0, s8
fstmiasge r0!, {s24-s27} vstmiage r0!, {s24-s27}
fstmiasge r0!, {s28-s31} vstmiage r0!, {s28-s31}
bgt 1b bgt 1b
bic r12, r12, #(7 << 16) /* set vector size back to 1 */ bic r12, r12, #(7 << 16) /* set vector size back to 1 */
@ -88,44 +89,44 @@ function ff_vector_fmul_vfp, export=1
function ff_vector_fmul_reverse_vfp, export=1 function ff_vector_fmul_reverse_vfp, export=1
vpush {d8-d15} vpush {d8-d15}
add r2, r2, r3, lsl #2 add r2, r2, r3, lsl #2
fldmdbs r2!, {s0-s3} vldmdb r2!, {s0-s3}
fldmias r1!, {s8-s11} vldmia r1!, {s8-s11}
fldmdbs r2!, {s4-s7} vldmdb r2!, {s4-s7}
fldmias r1!, {s12-s15} vldmia r1!, {s12-s15}
fmuls s8, s3, s8 vmul.f32 s8, s3, s8
fmuls s9, s2, s9 vmul.f32 s9, s2, s9
fmuls s10, s1, s10 vmul.f32 s10, s1, s10
fmuls s11, s0, s11 vmul.f32 s11, s0, s11
1: 1:
subs r3, r3, #16 subs r3, r3, #16
fldmdbsge r2!, {s16-s19} vldmdbge r2!, {s16-s19}
fmuls s12, s7, s12 vmul.f32 s12, s7, s12
fldmiasge r1!, {s24-s27} vldmiage r1!, {s24-s27}
fmuls s13, s6, s13 vmul.f32 s13, s6, s13
fldmdbsge r2!, {s20-s23} vldmdbge r2!, {s20-s23}
fmuls s14, s5, s14 vmul.f32 s14, s5, s14
fldmiasge r1!, {s28-s31} vldmiage r1!, {s28-s31}
fmuls s15, s4, s15 vmul.f32 s15, s4, s15
fmulsge s24, s19, s24 vmulge.f32 s24, s19, s24
fldmdbsgt r2!, {s0-s3} vldmdbgt r2!, {s0-s3}
fmulsge s25, s18, s25 vmulge.f32 s25, s18, s25
fstmias r0!, {s8-s13} vstmia r0!, {s8-s13}
fmulsge s26, s17, s26 vmulge.f32 s26, s17, s26
fldmiasgt r1!, {s8-s11} vldmiagt r1!, {s8-s11}
fmulsge s27, s16, s27 vmulge.f32 s27, s16, s27
fmulsge s28, s23, s28 vmulge.f32 s28, s23, s28
fldmdbsgt r2!, {s4-s7} vldmdbgt r2!, {s4-s7}
fmulsge s29, s22, s29 vmulge.f32 s29, s22, s29
fstmias r0!, {s14-s15} vstmia r0!, {s14-s15}
fmulsge s30, s21, s30 vmulge.f32 s30, s21, s30
fmulsge s31, s20, s31 vmulge.f32 s31, s20, s31
fmulsge s8, s3, s8 vmulge.f32 s8, s3, s8
fldmiasgt r1!, {s12-s15} vldmiagt r1!, {s12-s15}
fmulsge s9, s2, s9 vmulge.f32 s9, s2, s9
fmulsge s10, s1, s10 vmulge.f32 s10, s1, s10
fstmiasge r0!, {s24-s27} vstmiage r0!, {s24-s27}
fmulsge s11, s0, s11 vmulge.f32 s11, s0, s11
fstmiasge r0!, {s28-s31} vstmiage r0!, {s28-s31}
bgt 1b bgt 1b
vpop {d8-d15} vpop {d8-d15}
@ -143,36 +144,36 @@ function ff_vector_fmul_reverse_vfp, export=1
function ff_float_to_int16_vfp, export=1 function ff_float_to_int16_vfp, export=1
push {r4-r8,lr} push {r4-r8,lr}
vpush {d8-d11} vpush {d8-d11}
fldmias r1!, {s16-s23} vldmia r1!, {s16-s23}
ftosis s0, s16 vcvt.s32.f32 s0, s16
ftosis s1, s17 vcvt.s32.f32 s1, s17
ftosis s2, s18 vcvt.s32.f32 s2, s18
ftosis s3, s19 vcvt.s32.f32 s3, s19
ftosis s4, s20 vcvt.s32.f32 s4, s20
ftosis s5, s21 vcvt.s32.f32 s5, s21
ftosis s6, s22 vcvt.s32.f32 s6, s22
ftosis s7, s23 vcvt.s32.f32 s7, s23
1: 1:
subs r2, r2, #8 subs r2, r2, #8
fmrrs r3, r4, {s0, s1} vmov r3, r4, s0, s1
fmrrs r5, r6, {s2, s3} vmov r5, r6, s2, s3
fmrrs r7, r8, {s4, s5} vmov r7, r8, s4, s5
fmrrs ip, lr, {s6, s7} vmov ip, lr, s6, s7
fldmiasgt r1!, {s16-s23} vldmiagt r1!, {s16-s23}
ssat r4, #16, r4 ssat r4, #16, r4
ssat r3, #16, r3 ssat r3, #16, r3
ssat r6, #16, r6 ssat r6, #16, r6
ssat r5, #16, r5 ssat r5, #16, r5
pkhbt r3, r3, r4, lsl #16 pkhbt r3, r3, r4, lsl #16
pkhbt r4, r5, r6, lsl #16 pkhbt r4, r5, r6, lsl #16
ftosisgt s0, s16 vcvtgt.s32.f32 s0, s16
ftosisgt s1, s17 vcvtgt.s32.f32 s1, s17
ftosisgt s2, s18 vcvtgt.s32.f32 s2, s18
ftosisgt s3, s19 vcvtgt.s32.f32 s3, s19
ftosisgt s4, s20 vcvtgt.s32.f32 s4, s20
ftosisgt s5, s21 vcvtgt.s32.f32 s5, s21
ftosisgt s6, s22 vcvtgt.s32.f32 s6, s22
ftosisgt s7, s23 vcvtgt.s32.f32 s7, s23
ssat r8, #16, r8 ssat r8, #16, r8
ssat r7, #16, r7 ssat r7, #16, r7
ssat lr, #16, lr ssat lr, #16, lr