mirror of https://code.videolan.org/videolan/dav1d
fix: various errors in implementation of BTI
Amend call type in refmvs. Because these blocks are reached via blr x11, they need to be annotated. Add missing BTI landing pads in ipred.S and ipred16.S. Because the subroutines are called via a br from register, they need annotation with 'bti j' (AARCH64_VALID_JUMP_TARGET).
This commit is contained in:
parent
97becd7372
commit
769bd1457a
|
@ -3529,6 +3529,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
br x9
|
||||
|
||||
2:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.h}[0], [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.h}[0], [x13], x1
|
||||
|
@ -3547,6 +3548,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
b 1b
|
||||
|
||||
4:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.s}[0], [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.s}[0], [x13], x1
|
||||
|
@ -3565,6 +3567,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
b 1b
|
||||
|
||||
8:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.8b}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.8b}, [x13], x1
|
||||
|
@ -3585,6 +3588,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
16:
|
||||
32:
|
||||
64:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.16b}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.16b}, [x13], x1
|
||||
|
|
|
@ -3664,6 +3664,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
br x9
|
||||
|
||||
2:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.s}[0], [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.s}[0], [x13], x1
|
||||
|
@ -3682,6 +3683,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
b 1b
|
||||
|
||||
4:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.4h}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.4h}, [x13], x1
|
||||
|
@ -3703,6 +3705,7 @@ function ipred_z3_fill_padding_neon, export=0
|
|||
16:
|
||||
32:
|
||||
64:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
st1 {v31.8h}, [x0], x1
|
||||
subs w4, w4, #4
|
||||
st1 {v31.8h}, [x13], x1
|
||||
|
|
|
@ -202,27 +202,27 @@ function save_tmvs_neon, export=1
|
|||
ret
|
||||
|
||||
10:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
add x16, x3, #4
|
||||
st1 {v0.s}[0], [x3]
|
||||
st1 {v0.b}[4], [x16]
|
||||
add x3, x3, #5
|
||||
ret
|
||||
20:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
add x16, x3, #8
|
||||
st1 {v0.d}[0], [x3]
|
||||
st1 {v0.h}[4], [x16]
|
||||
add x3, x3, #2*5
|
||||
ret
|
||||
40:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
st1 {v0.16b}, [x3]
|
||||
str s1, [x3, #16]
|
||||
add x3, x3, #4*5
|
||||
ret
|
||||
80:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This writes 6 full entries plus 2 extra bytes
|
||||
st1 {v0.16b, v1.16b}, [x3]
|
||||
// Write the last few, overlapping with the first write.
|
||||
|
@ -230,7 +230,7 @@ function save_tmvs_neon, export=1
|
|||
add x3, x3, #8*5
|
||||
ret
|
||||
160:
|
||||
AARCH64_VALID_JUMP_TARGET
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
add x16, x3, #6*5
|
||||
add x17, x3, #12*5
|
||||
// This writes 6 full entries plus 2 extra bytes
|
||||
|
|
Loading…
Reference in New Issue