fix: various errors in implementation of BTI

Amend call type in refmvs. Because these blocks are reached via
blr x11, they need to be annotated.

Add missing BTI landing pads in ipred.S and ipred16.S. Because the
subroutines are called via a br from register, they need annotation with
'bti j' (AARCH64_VALID_JUMP_TARGET).
This commit is contained in:
André Kempe 2023-09-08 10:02:06 +01:00
parent 97becd7372
commit 769bd1457a
3 changed files with 12 additions and 5 deletions

View File

@ -3529,6 +3529,7 @@ function ipred_z3_fill_padding_neon, export=0
br x9
2:
AARCH64_VALID_JUMP_TARGET
st1 {v31.h}[0], [x0], x1
subs w4, w4, #4
st1 {v31.h}[0], [x13], x1
@ -3547,6 +3548,7 @@ function ipred_z3_fill_padding_neon, export=0
b 1b
4:
AARCH64_VALID_JUMP_TARGET
st1 {v31.s}[0], [x0], x1
subs w4, w4, #4
st1 {v31.s}[0], [x13], x1
@ -3565,6 +3567,7 @@ function ipred_z3_fill_padding_neon, export=0
b 1b
8:
AARCH64_VALID_JUMP_TARGET
st1 {v31.8b}, [x0], x1
subs w4, w4, #4
st1 {v31.8b}, [x13], x1
@ -3585,6 +3588,7 @@ function ipred_z3_fill_padding_neon, export=0
16:
32:
64:
AARCH64_VALID_JUMP_TARGET
st1 {v31.16b}, [x0], x1
subs w4, w4, #4
st1 {v31.16b}, [x13], x1

View File

@ -3664,6 +3664,7 @@ function ipred_z3_fill_padding_neon, export=0
br x9
2:
AARCH64_VALID_JUMP_TARGET
st1 {v31.s}[0], [x0], x1
subs w4, w4, #4
st1 {v31.s}[0], [x13], x1
@ -3682,6 +3683,7 @@ function ipred_z3_fill_padding_neon, export=0
b 1b
4:
AARCH64_VALID_JUMP_TARGET
st1 {v31.4h}, [x0], x1
subs w4, w4, #4
st1 {v31.4h}, [x13], x1
@ -3703,6 +3705,7 @@ function ipred_z3_fill_padding_neon, export=0
16:
32:
64:
AARCH64_VALID_JUMP_TARGET
st1 {v31.8h}, [x0], x1
subs w4, w4, #4
st1 {v31.8h}, [x13], x1

View File

@ -202,27 +202,27 @@ function save_tmvs_neon, export=1
ret
10:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
add x16, x3, #4
st1 {v0.s}[0], [x3]
st1 {v0.b}[4], [x16]
add x3, x3, #5
ret
20:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
add x16, x3, #8
st1 {v0.d}[0], [x3]
st1 {v0.h}[4], [x16]
add x3, x3, #2*5
ret
40:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
st1 {v0.16b}, [x3]
str s1, [x3, #16]
add x3, x3, #4*5
ret
80:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
// This writes 6 full entries plus 2 extra bytes
st1 {v0.16b, v1.16b}, [x3]
// Write the last few, overlapping with the first write.
@ -230,7 +230,7 @@ function save_tmvs_neon, export=1
add x3, x3, #8*5
ret
160:
AARCH64_VALID_JUMP_TARGET
AARCH64_VALID_CALL_TARGET
add x16, x3, #6*5
add x17, x3, #12*5
// This writes 6 full entries plus 2 extra bytes