Skip to content

Commit 630cb3e

Browse files
committed
Add x86 encoding for SIMD imul
Only i16x8 and i32x4 are encoded in this commit mainly because i8x16 and i64x2 do not have simple encodings in x86. i64x2 is not required by the SIMD spec and there is discussion (WebAssembly/simd#98 (comment)) about removing i8x16.
1 parent 168ad7f commit 630cb3e

File tree

5 files changed

+67
-4
lines changed

5 files changed

+67
-4
lines changed

cranelift/codegen/meta/src/isa/x86/encodings.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1945,6 +1945,16 @@ pub(crate) fn define(
19451945
e.enc_32_64(isub, rec_fa.opcodes(*opcodes));
19461946
}
19471947

1948+
// SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
1949+
// and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
1950+
for (ty, opcodes, isap) in &[
1951+
(I16, &PMULLW[..], None),
1952+
(I32, &PMULLD[..], Some(use_sse41_simd)),
1953+
] {
1954+
let imul = imul.bind_vector_from_lane(ty.clone(), sse_vector_size);
1955+
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
1956+
}
1957+
19481958
// SIMD icmp using PCMPEQ*
19491959
let mut pcmpeq_mapping: HashMap<u64, (&[u8], Option<SettingPredicateNumber>)> = HashMap::new();
19501960
pcmpeq_mapping.insert(8, (&PCMPEQB, None));

cranelift/codegen/meta/src/isa/x86/opcodes.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,14 @@ pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
281281
/// Insert word (SSE2).
282282
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
283283

284+
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
285+
/// the results in xmm1 (SSE2).
286+
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
287+
288+
/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
289+
/// bits of each product in xmm1 (SSE4.1).
290+
pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
291+
284292
/// Pop top of stack into r{16,32,64}; increment stack pointer.
285293
pub static POP_REG: [u8; 1] = [0x58];
286294

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,8 +1722,7 @@ pub(crate) fn define(
17221722
Wrapping integer multiplication: `a := x y \pmod{2^B}`.
17231723
17241724
This instruction does not depend on the signed/unsigned interpretation
1725-
of the
1726-
operands.
1725+
of the operands.
17271726
17281727
Polymorphic over all integer types (vector and scalar).
17291728
"#,

cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,47 @@ ebb0:
120120

121121
return ; bin: c3
122122
}
123+
124+
function %imul_i32x4() -> b1 {
125+
ebb0:
126+
[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
127+
[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2]
128+
[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1
129+
130+
v3 = extractlane v2, 0
131+
v4 = icmp_imm eq v3, -2
132+
133+
v5 = extractlane v2, 1
134+
v6 = icmp_imm eq v5, 0
135+
136+
v7 = extractlane v2, 3
137+
v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)
138+
139+
v9 = band v4, v6
140+
v10 = band v8, v9
141+
return v10
142+
}
143+
; run
144+
145+
function %imul_i16x8() -> b1 {
146+
ebb0:
147+
[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
148+
[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
149+
[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca
150+
151+
v3 = extractlane v2, 0
152+
v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
153+
; uextend-ed, not sextend-ed
154+
155+
v5 = extractlane v2, 1
156+
v6 = icmp_imm eq v5, 0
157+
158+
v7 = extractlane v2, 3
159+
v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe
160+
161+
v9 = band v4, v6
162+
v10 = band v8, v9
163+
164+
return v4
165+
}
166+
; run

cranelift/wasm/src/code_translator.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
10081008
let a = state.pop1();
10091009
state.push1(builder.ins().ineg(a))
10101010
}
1011+
Operator::I16x8Mul | Operator::I32x4Mul => {
1012+
let (a, b) = state.pop2();
1013+
state.push1(builder.ins().imul(a, b))
1014+
}
10111015
Operator::I8x16Eq
10121016
| Operator::I8x16Ne
10131017
| Operator::I8x16LtS
@@ -1074,13 +1078,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
10741078
| Operator::I16x8AddSaturateU
10751079
| Operator::I16x8SubSaturateS
10761080
| Operator::I16x8SubSaturateU
1077-
| Operator::I16x8Mul
10781081
| Operator::I32x4AnyTrue
10791082
| Operator::I32x4AllTrue
10801083
| Operator::I32x4Shl
10811084
| Operator::I32x4ShrS
10821085
| Operator::I32x4ShrU
1083-
| Operator::I32x4Mul
10841086
| Operator::I64x2AnyTrue
10851087
| Operator::I64x2AllTrue
10861088
| Operator::I64x2Shl

0 commit comments

Comments
 (0)