Skip to content

Commit a80f861

Browse files
committed
Implement relaxed SIMD dot product instructions
As proposed in WebAssembly/relaxed-simd#52.
1 parent 094deb0 commit a80f861

13 files changed

+235
-13
lines changed

scripts/gen-s-parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,11 @@
533533
("f32x4.relaxed_max", "makeBinary(s, BinaryOp::RelaxedMaxVecF32x4)"),
534534
("f64x2.relaxed_min", "makeBinary(s, BinaryOp::RelaxedMinVecF64x2)"),
535535
("f64x2.relaxed_max", "makeBinary(s, BinaryOp::RelaxedMaxVecF64x2)"),
536-
("i16x8.relaxed_q15mulr_s", "makeBinary(s, BinaryOp::RelaxedQ15MulrSVecI16x8)"),
536+
("i16x8.relaxed_q15mulr_s", "makeBinary(s, BinaryOp::RelaxedQ15MulrSVecI16x8)"),
537+
("i16x8.dot_i8x16_i7x16_s", "makeBinary(s, BinaryOp::DotI8x16I7x16SToVecI16x8)"),
538+
("i16x8.dot_i8x16_i7x16_u", "makeBinary(s, BinaryOp::DotI8x16I7x16UToVecI16x8)"),
539+
("i32x4.dot_i8x16_i7x16_add_s", "makeSIMDTernary(s, SIMDTernaryOp::DotI8x16I7x16AddSToVecI32x4)"),
540+
("i32x4.dot_i8x16_i7x16_add_u", "makeSIMDTernary(s, SIMDTernaryOp::DotI8x16I7x16AddUToVecI32x4)"),
537541

538542
# reference types instructions
539543
("ref.null", "makeRefNull(s)"),

src/gen-s-parser.inc

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,17 @@ switch (op[0]) {
933933
case 'b':
934934
if (strcmp(op, "i16x8.bitmask") == 0) { return makeUnary(s, UnaryOp::BitmaskVecI16x8); }
935935
goto parse_error;
936+
case 'd': {
937+
switch (op[22]) {
938+
case 's':
939+
if (strcmp(op, "i16x8.dot_i8x16_i7x16_s") == 0) { return makeBinary(s, BinaryOp::DotI8x16I7x16SToVecI16x8); }
940+
goto parse_error;
941+
case 'u':
942+
if (strcmp(op, "i16x8.dot_i8x16_i7x16_u") == 0) { return makeBinary(s, BinaryOp::DotI8x16I7x16UToVecI16x8); }
943+
goto parse_error;
944+
default: goto parse_error;
945+
}
946+
}
936947
case 'e': {
937948
switch (op[7]) {
938949
case 'q':
@@ -1692,9 +1703,25 @@ switch (op[0]) {
16921703
case 'b':
16931704
if (strcmp(op, "i32x4.bitmask") == 0) { return makeUnary(s, UnaryOp::BitmaskVecI32x4); }
16941705
goto parse_error;
1695-
case 'd':
1696-
if (strcmp(op, "i32x4.dot_i16x8_s") == 0) { return makeBinary(s, BinaryOp::DotSVecI16x8ToVecI32x4); }
1697-
goto parse_error;
1706+
case 'd': {
1707+
switch (op[11]) {
1708+
case '1':
1709+
if (strcmp(op, "i32x4.dot_i16x8_s") == 0) { return makeBinary(s, BinaryOp::DotSVecI16x8ToVecI32x4); }
1710+
goto parse_error;
1711+
case '8': {
1712+
switch (op[26]) {
1713+
case 's':
1714+
if (strcmp(op, "i32x4.dot_i8x16_i7x16_add_s") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::DotI8x16I7x16AddSToVecI32x4); }
1715+
goto parse_error;
1716+
case 'u':
1717+
if (strcmp(op, "i32x4.dot_i8x16_i7x16_add_u") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::DotI8x16I7x16AddUToVecI32x4); }
1718+
goto parse_error;
1719+
default: goto parse_error;
1720+
}
1721+
}
1722+
default: goto parse_error;
1723+
}
1724+
}
16981725
case 'e': {
16991726
switch (op[7]) {
17001727
case 'q':

src/ir/cost.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
494494
case SwizzleVec8x16:
495495
case RelaxedSwizzleVec8x16:
496496
case RelaxedQ15MulrSVecI16x8:
497+
case DotI8x16I7x16SToVecI16x8:
498+
case DotI8x16I7x16UToVecI16x8:
497499
ret = 1;
498500
break;
499501
case InvalidBinary:
@@ -541,6 +543,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
541543
case RelaxedFmsVecF32x4:
542544
case RelaxedFmaVecF64x2:
543545
case RelaxedFmsVecF64x2:
546+
case DotI8x16I7x16AddSToVecI32x4:
547+
case DotI8x16I7x16AddUToVecI32x4:
544548
ret = 1;
545549
break;
546550
}

src/literal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,8 @@ class Literal {
572572
Literal minUI32x4(const Literal& other) const;
573573
Literal maxSI32x4(const Literal& other) const;
574574
Literal maxUI32x4(const Literal& other) const;
575+
Literal dotSI8x16toI16x8(const Literal& other) const;
576+
Literal dotUI8x16toI16x8(const Literal& other) const;
575577
Literal dotSI16x8toI32x4(const Literal& other) const;
576578
Literal extMulLowSI32x4(const Literal& other) const;
577579
Literal extMulHighSI32x4(const Literal& other) const;

src/passes/Print.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,12 @@ struct PrintExpressionContents
683683
case RelaxedFmsVecF64x2:
684684
o << "f64x2.relaxed_fms";
685685
break;
686+
case DotI8x16I7x16AddSToVecI32x4:
687+
o << "i32x4.dot_i8x16_i7x16_add_s";
688+
break;
689+
case DotI8x16I7x16AddUToVecI32x4:
690+
o << "i32x4.dot_i8x16_i7x16_add_u";
691+
break;
686692
}
687693
restoreNormalColor(o);
688694
}
@@ -1854,6 +1860,12 @@ struct PrintExpressionContents
18541860
case RelaxedQ15MulrSVecI16x8:
18551861
o << "i16x8.relaxed_q15mulr_s";
18561862
break;
1863+
case DotI8x16I7x16SToVecI16x8:
1864+
o << "i16x8.dot_i8x16_i7x16_s";
1865+
break;
1866+
case DotI8x16I7x16UToVecI16x8:
1867+
o << "i16x8.dot_i8x16_i7x16_u";
1868+
break;
18571869

18581870
case InvalidBinary:
18591871
WASM_UNREACHABLE("unvalid binary operator");

src/wasm-binary.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,10 @@ enum ASTNodes {
10381038
F64x2RelaxedMin = 0xd4,
10391039
F64x2RelaxedMax = 0xee,
10401040
I16x8RelaxedQ15MulrS = 0x111,
1041+
I16x8DotI8x16I7x16S = 0x112,
1042+
I16x8DotI8x16I7x16U = 0x113,
1043+
I32x4DotI8x16I7x16AddS = 0x114,
1044+
I32x4DotI8x16I7x16AddU = 0x115,
10411045

10421046
// bulk memory opcodes
10431047

src/wasm-interpreter.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,11 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
10161016
case RelaxedSwizzleVec8x16:
10171017
return left.swizzleI8x16(right);
10181018

1019+
case DotI8x16I7x16SToVecI16x8:
1020+
return left.dotSI8x16toI16x8(right);
1021+
case DotI8x16I7x16UToVecI16x8:
1022+
return left.dotUI8x16toI16x8(right);
1023+
10191024
case InvalidBinary:
10201025
WASM_UNREACHABLE("invalid binary op");
10211026
}
@@ -1124,7 +1129,7 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
11241129
case RelaxedFmsVecF64x2:
11251130
return a.relaxedFmsF64x2(b, c);
11261131
default:
1127-
// TODO: implement signselect
1132+
// TODO: implement signselect and dot_add
11281133
WASM_UNREACHABLE("not implemented");
11291134
}
11301135
}

src/wasm.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,8 @@ enum BinaryOp {
473473
RelaxedMinVecF64x2,
474474
RelaxedMaxVecF64x2,
475475
RelaxedQ15MulrSVecI16x8,
476+
DotI8x16I7x16SToVecI16x8,
477+
DotI8x16I7x16UToVecI16x8,
476478

477479
InvalidBinary
478480
};
@@ -552,6 +554,8 @@ enum SIMDTernaryOp {
552554
LaneselectI16x8,
553555
LaneselectI32x4,
554556
LaneselectI64x2,
557+
DotI8x16I7x16AddSToVecI32x4,
558+
DotI8x16I7x16AddUToVecI32x4,
555559
};
556560

557561
enum RefIsOp {

src/wasm/literal.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,17 +2372,33 @@ Literal Literal::pmaxF64x2(const Literal& other) const {
23722372
return binary<2, &Literal::getLanesF64x2, &Literal::pmax>(*this, other);
23732373
}
23742374

2375-
Literal Literal::dotSI16x8toI32x4(const Literal& other) const {
2376-
LaneArray<8> lhs = getLanesSI16x8();
2377-
LaneArray<8> rhs = other.getLanesSI16x8();
2378-
LaneArray<4> result;
2379-
for (size_t i = 0; i < 4; ++i) {
2380-
result[i] = Literal(lhs[i * 2].geti32() * rhs[i * 2].geti32() +
2381-
lhs[i * 2 + 1].geti32() * rhs[i * 2 + 1].geti32());
2375+
template<size_t Lanes,
2376+
size_t Factor,
2377+
LaneArray<Lanes * Factor> (Literal::*IntoLanes)() const>
2378+
static Literal dot(const Literal& left, const Literal& right) {
2379+
LaneArray<Lanes* Factor> lhs = (left.*IntoLanes)();
2380+
LaneArray<Lanes* Factor> rhs = (right.*IntoLanes)();
2381+
LaneArray<Lanes> result;
2382+
for (size_t i = 0; i < Lanes; ++i) {
2383+
result[i] = Literal(int32_t(0));
2384+
for (size_t j = 0; j < Factor; ++j) {
2385+
result[i] = Literal(result[i].geti32() + lhs[i * Factor + j].geti32() *
2386+
rhs[i * Factor + j].geti32());
2387+
}
23822388
}
23832389
return Literal(result);
23842390
}
23852391

2392+
Literal Literal::dotSI8x16toI16x8(const Literal& other) const {
2393+
return dot<8, 2, &Literal::getLanesSI8x16>(*this, other);
2394+
}
2395+
Literal Literal::dotUI8x16toI16x8(const Literal& other) const {
2396+
return dot<8, 2, &Literal::getLanesUI8x16>(*this, other);
2397+
}
2398+
Literal Literal::dotSI16x8toI32x4(const Literal& other) const {
2399+
return dot<4, 2, &Literal::getLanesSI16x8>(*this, other);
2400+
}
2401+
23862402
Literal Literal::bitselectV128(const Literal& left,
23872403
const Literal& right) const {
23882404
return andV128(left).orV128(notV128().andV128(right));

src/wasm/wasm-binary.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5601,6 +5601,14 @@ bool WasmBinaryBuilder::maybeVisitSIMDBinary(Expression*& out, uint32_t code) {
56015601
curr = allocator.alloc<Binary>();
56025602
curr->op = RelaxedQ15MulrSVecI16x8;
56035603
break;
5604+
case BinaryConsts::I16x8DotI8x16I7x16S:
5605+
curr = allocator.alloc<Binary>();
5606+
curr->op = DotI8x16I7x16SToVecI16x8;
5607+
break;
5608+
case BinaryConsts::I16x8DotI8x16I7x16U:
5609+
curr = allocator.alloc<Binary>();
5610+
curr->op = DotI8x16I7x16UToVecI16x8;
5611+
break;
56045612
default:
56055613
return false;
56065614
}
@@ -6075,6 +6083,14 @@ bool WasmBinaryBuilder::maybeVisitSIMDTernary(Expression*& out, uint32_t code) {
60756083
curr = allocator.alloc<SIMDTernary>();
60766084
curr->op = RelaxedFmsVecF64x2;
60776085
break;
6086+
case BinaryConsts::I32x4DotI8x16I7x16AddS:
6087+
curr = allocator.alloc<SIMDTernary>();
6088+
curr->op = DotI8x16I7x16AddSToVecI32x4;
6089+
break;
6090+
case BinaryConsts::I32x4DotI8x16I7x16AddU:
6091+
curr = allocator.alloc<SIMDTernary>();
6092+
curr->op = DotI8x16I7x16AddUToVecI32x4;
6093+
break;
60786094
default:
60796095
return false;
60806096
}

src/wasm/wasm-stack.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,12 @@ void BinaryInstWriter::visitSIMDTernary(SIMDTernary* curr) {
569569
case RelaxedFmsVecF64x2:
570570
o << U32LEB(BinaryConsts::F64x2RelaxedFms);
571571
break;
572+
case DotI8x16I7x16AddSToVecI32x4:
573+
o << U32LEB(BinaryConsts::I32x4DotI8x16I7x16AddS);
574+
break;
575+
case DotI8x16I7x16AddUToVecI32x4:
576+
o << U32LEB(BinaryConsts::I32x4DotI8x16I7x16AddU);
577+
break;
572578
}
573579
}
574580

@@ -1846,6 +1852,14 @@ void BinaryInstWriter::visitBinary(Binary* curr) {
18461852
o << int8_t(BinaryConsts::SIMDPrefix)
18471853
<< U32LEB(BinaryConsts::I16x8RelaxedQ15MulrS);
18481854
break;
1855+
case DotI8x16I7x16SToVecI16x8:
1856+
o << int8_t(BinaryConsts::SIMDPrefix)
1857+
<< U32LEB(BinaryConsts::I16x8DotI8x16I7x16S);
1858+
break;
1859+
case DotI8x16I7x16UToVecI16x8:
1860+
o << int8_t(BinaryConsts::SIMDPrefix)
1861+
<< U32LEB(BinaryConsts::I16x8DotI8x16I7x16U);
1862+
break;
18491863

18501864
case InvalidBinary:
18511865
WASM_UNREACHABLE("invalid binary op");

src/wasm/wasm-validator.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1635,7 +1635,9 @@ void FunctionValidator::visitBinary(Binary* curr) {
16351635
case NarrowUVecI32x4ToVecI16x8:
16361636
case SwizzleVec8x16:
16371637
case RelaxedSwizzleVec8x16:
1638-
case RelaxedQ15MulrSVecI16x8: {
1638+
case RelaxedQ15MulrSVecI16x8:
1639+
case DotI8x16I7x16SToVecI16x8:
1640+
case DotI8x16I7x16UToVecI16x8: {
16391641
shouldBeEqualOrFirstIsUnreachable(
16401642
curr->left->type, Type(Type::v128), curr, "v128 op");
16411643
shouldBeEqualOrFirstIsUnreachable(

test/lit/relaxed-simd.wast

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,88 @@
369369
)
370370
)
371371

372+
;; CHECK-BINARY: (func $i16x8.dot_i8x16_i7x16_s (param $0 v128) (param $1 v128) (result v128)
373+
;; CHECK-BINARY-NEXT: (i16x8.dot_i8x16_i7x16_s
374+
;; CHECK-BINARY-NEXT: (local.get $0)
375+
;; CHECK-BINARY-NEXT: (local.get $1)
376+
;; CHECK-BINARY-NEXT: )
377+
;; CHECK-BINARY-NEXT: )
378+
;; CHECK-TEXT: (func $i16x8.dot_i8x16_i7x16_s (param $0 v128) (param $1 v128) (result v128)
379+
;; CHECK-TEXT-NEXT: (i16x8.dot_i8x16_i7x16_s
380+
;; CHECK-TEXT-NEXT: (local.get $0)
381+
;; CHECK-TEXT-NEXT: (local.get $1)
382+
;; CHECK-TEXT-NEXT: )
383+
;; CHECK-TEXT-NEXT: )
384+
(func $i16x8.dot_i8x16_i7x16_s (param $0 v128) (param $1 v128) (result v128)
385+
(i16x8.dot_i8x16_i7x16_s
386+
(local.get $0)
387+
(local.get $1)
388+
)
389+
)
390+
391+
;; CHECK-BINARY: (func $i16x8.dot_i8x16_i7x16_u (param $0 v128) (param $1 v128) (result v128)
392+
;; CHECK-BINARY-NEXT: (i16x8.dot_i8x16_i7x16_u
393+
;; CHECK-BINARY-NEXT: (local.get $0)
394+
;; CHECK-BINARY-NEXT: (local.get $1)
395+
;; CHECK-BINARY-NEXT: )
396+
;; CHECK-BINARY-NEXT: )
397+
;; CHECK-TEXT: (func $i16x8.dot_i8x16_i7x16_u (param $0 v128) (param $1 v128) (result v128)
398+
;; CHECK-TEXT-NEXT: (i16x8.dot_i8x16_i7x16_u
399+
;; CHECK-TEXT-NEXT: (local.get $0)
400+
;; CHECK-TEXT-NEXT: (local.get $1)
401+
;; CHECK-TEXT-NEXT: )
402+
;; CHECK-TEXT-NEXT: )
403+
(func $i16x8.dot_i8x16_i7x16_u (param $0 v128) (param $1 v128) (result v128)
404+
(i16x8.dot_i8x16_i7x16_u
405+
(local.get $0)
406+
(local.get $1)
407+
)
408+
)
409+
410+
;; CHECK-BINARY: (func $i32x4.dot_i8x16_i7x16_add_s (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
411+
;; CHECK-BINARY-NEXT: (i32x4.dot_i8x16_i7x16_add_s
412+
;; CHECK-BINARY-NEXT: (local.get $0)
413+
;; CHECK-BINARY-NEXT: (local.get $1)
414+
;; CHECK-BINARY-NEXT: (local.get $2)
415+
;; CHECK-BINARY-NEXT: )
416+
;; CHECK-BINARY-NEXT: )
417+
;; CHECK-TEXT: (func $i32x4.dot_i8x16_i7x16_add_s (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
418+
;; CHECK-TEXT-NEXT: (i32x4.dot_i8x16_i7x16_add_s
419+
;; CHECK-TEXT-NEXT: (local.get $0)
420+
;; CHECK-TEXT-NEXT: (local.get $1)
421+
;; CHECK-TEXT-NEXT: (local.get $2)
422+
;; CHECK-TEXT-NEXT: )
423+
;; CHECK-TEXT-NEXT: )
424+
(func $i32x4.dot_i8x16_i7x16_add_s (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
425+
(i32x4.dot_i8x16_i7x16_add_s
426+
(local.get $0)
427+
(local.get $1)
428+
(local.get $2)
429+
)
430+
)
431+
432+
;; CHECK-BINARY: (func $i32x4.dot_i8x16_i7x16_add_u (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
433+
;; CHECK-BINARY-NEXT: (i32x4.dot_i8x16_i7x16_add_u
434+
;; CHECK-BINARY-NEXT: (local.get $0)
435+
;; CHECK-BINARY-NEXT: (local.get $1)
436+
;; CHECK-BINARY-NEXT: (local.get $2)
437+
;; CHECK-BINARY-NEXT: )
438+
;; CHECK-BINARY-NEXT: )
439+
;; CHECK-TEXT: (func $i32x4.dot_i8x16_i7x16_add_u (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
440+
;; CHECK-TEXT-NEXT: (i32x4.dot_i8x16_i7x16_add_u
441+
;; CHECK-TEXT-NEXT: (local.get $0)
442+
;; CHECK-TEXT-NEXT: (local.get $1)
443+
;; CHECK-TEXT-NEXT: (local.get $2)
444+
;; CHECK-TEXT-NEXT: )
445+
;; CHECK-TEXT-NEXT: )
446+
(func $i32x4.dot_i8x16_i7x16_add_u (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
447+
(i32x4.dot_i8x16_i7x16_add_u
448+
(local.get $0)
449+
(local.get $1)
450+
(local.get $2)
451+
)
452+
)
453+
372454
)
373455
;; CHECK-NODEBUG: (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128)))
374456

@@ -507,3 +589,33 @@
507589
;; CHECK-NODEBUG-NEXT: (local.get $1)
508590
;; CHECK-NODEBUG-NEXT: )
509591
;; CHECK-NODEBUG-NEXT: )
592+
593+
;; CHECK-NODEBUG: (func $18 (param $0 v128) (param $1 v128) (result v128)
594+
;; CHECK-NODEBUG-NEXT: (i16x8.dot_i8x16_i7x16_s
595+
;; CHECK-NODEBUG-NEXT: (local.get $0)
596+
;; CHECK-NODEBUG-NEXT: (local.get $1)
597+
;; CHECK-NODEBUG-NEXT: )
598+
;; CHECK-NODEBUG-NEXT: )
599+
600+
;; CHECK-NODEBUG: (func $19 (param $0 v128) (param $1 v128) (result v128)
601+
;; CHECK-NODEBUG-NEXT: (i16x8.dot_i8x16_i7x16_u
602+
;; CHECK-NODEBUG-NEXT: (local.get $0)
603+
;; CHECK-NODEBUG-NEXT: (local.get $1)
604+
;; CHECK-NODEBUG-NEXT: )
605+
;; CHECK-NODEBUG-NEXT: )
606+
607+
;; CHECK-NODEBUG: (func $20 (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
608+
;; CHECK-NODEBUG-NEXT: (i32x4.dot_i8x16_i7x16_add_s
609+
;; CHECK-NODEBUG-NEXT: (local.get $0)
610+
;; CHECK-NODEBUG-NEXT: (local.get $1)
611+
;; CHECK-NODEBUG-NEXT: (local.get $2)
612+
;; CHECK-NODEBUG-NEXT: )
613+
;; CHECK-NODEBUG-NEXT: )
614+
615+
;; CHECK-NODEBUG: (func $21 (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
616+
;; CHECK-NODEBUG-NEXT: (i32x4.dot_i8x16_i7x16_add_u
617+
;; CHECK-NODEBUG-NEXT: (local.get $0)
618+
;; CHECK-NODEBUG-NEXT: (local.get $1)
619+
;; CHECK-NODEBUG-NEXT: (local.get $2)
620+
;; CHECK-NODEBUG-NEXT: )
621+
;; CHECK-NODEBUG-NEXT: )

0 commit comments

Comments
 (0)