Skip to content

Commit 168ad7f

Browse files
committed
Fix 16-bit x86_pextr encoding
The x86 ISA has (at least) two encodings for PEXTRW: 1. in the SSE2 opcode (66 0f c5) the XMM operand uses r/m and the GPR operand uses reg 2. in the SSE4.1 opcode (66 0f 3a 15) the XMM operand uses reg and the GPR operand uses r/m This changes the 16-bit x86_pextr encoding from 1 to 2 to match the other PEXTR* implementations (all #2 style).
1 parent c932f9b commit 168ad7f

File tree

3 files changed

+11
-12
lines changed

3 files changed

+11
-12
lines changed

cranelift/codegen/meta/src/isa/x86/encodings.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1798,23 +1798,22 @@ pub(crate) fn define(
17981798
}
17991799

18001800
// SIMD extractlane
1801-
let mut x86_pextr_mapping: HashMap<u64, (&'static [u8], Option<SettingPredicateNumber>)> =
1802-
HashMap::new();
1803-
x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd)));
1804-
x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None));
1805-
x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd)));
1806-
x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd)));
1801+
let mut x86_pextr_mapping: HashMap<u64, &'static [u8]> = HashMap::new();
1802+
x86_pextr_mapping.insert(8, &PEXTRB);
1803+
x86_pextr_mapping.insert(16, &PEXTRW);
1804+
x86_pextr_mapping.insert(32, &PEXTR);
1805+
x86_pextr_mapping.insert(64, &PEXTR);
18071806

18081807
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1809-
if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) {
1808+
if let Some(opcode) = x86_pextr_mapping.get(&ty.lane_bits()) {
18101809
let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
18111810
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
18121811
if ty.lane_bits() < 64 {
1813-
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
1812+
e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
18141813
} else {
18151814
// It turns out the 64-bit widths have REX/W encodings and only are available on
18161815
// x86_64.
1817-
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
1816+
e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
18181817
}
18191818
}
18201819
}

cranelift/codegen/meta/src/isa/x86/opcodes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@ pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
269269
/// Extract byte (SSE4.1).
270270
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
271271

272-
/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16.
273-
pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5];
272+
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
273+
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
274274

275275
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
276276
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];

cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ function %test_extractlane_i16() {
1717
ebb0:
1818
[-, %rax] v0 = iconst.i16 4
1919
[-, %xmm1] v1 = splat.i16x8 v0
20-
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04
20+
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04
2121
return
2222
}
2323

0 commit comments

Comments
 (0)