Skip to content

Commit 565d952

Browse files
committed
Add i32x4.dot_i16x8_s
This experimental instruction is specified in WebAssembly/simd#127 and is being implemented to enable further investigation of its performance impact.
1 parent 0a5925a commit 565d952

27 files changed

+845
-709
lines changed

build-js.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ export_function "_BinaryenMinSVecI32x4"
526526
export_function "_BinaryenMinUVecI32x4"
527527
export_function "_BinaryenMaxSVecI32x4"
528528
export_function "_BinaryenMaxUVecI32x4"
529+
export_function "_BinaryenDotSVecI16x8ToVecI32x4"
529530
export_function "_BinaryenNegVecI64x2"
530531
export_function "_BinaryenAnyTrueVecI64x2"
531532
export_function "_BinaryenAllTrueVecI64x2"

scripts/gen-s-parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@
405405
("i32x4.min_u", "makeBinary(s, BinaryOp::MinUVecI32x4)"),
406406
("i32x4.max_s", "makeBinary(s, BinaryOp::MaxSVecI32x4)"),
407407
("i32x4.max_u", "makeBinary(s, BinaryOp::MaxUVecI32x4)"),
408+
("i32x4.dot_i16x8_s", "makeBinary(s, BinaryOp::DotSVecI16x8ToVecI32x4)"),
408409
("i64x2.neg", "makeUnary(s, UnaryOp::NegVecI64x2)"),
409410
("i64x2.any_true", "makeUnary(s, UnaryOp::AnyTrueVecI64x2)"),
410411
("i64x2.all_true", "makeUnary(s, UnaryOp::AllTrueVecI64x2)"),

src/binaryen-c.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,9 @@ BinaryenOp BinaryenMinSVecI32x4(void) { return MinSVecI32x4; }
842842
BinaryenOp BinaryenMinUVecI32x4(void) { return MinUVecI32x4; }
843843
BinaryenOp BinaryenMaxSVecI32x4(void) { return MaxSVecI32x4; }
844844
BinaryenOp BinaryenMaxUVecI32x4(void) { return MaxUVecI32x4; }
845+
BinaryenOp BinaryenDotSVecI16x8ToVecI32x4(void) {
846+
return DotSVecI16x8ToVecI32x4;
847+
}
845848
BinaryenOp BinaryenNegVecI64x2(void) { return NegVecI64x2; }
846849
BinaryenOp BinaryenAnyTrueVecI64x2(void) { return AnyTrueVecI64x2; }
847850
BinaryenOp BinaryenAllTrueVecI64x2(void) { return AllTrueVecI64x2; }

src/binaryen-c.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,7 @@ BINARYEN_API BinaryenOp BinaryenMinSVecI32x4(void);
504504
BINARYEN_API BinaryenOp BinaryenMinUVecI32x4(void);
505505
BINARYEN_API BinaryenOp BinaryenMaxSVecI32x4(void);
506506
BINARYEN_API BinaryenOp BinaryenMaxUVecI32x4(void);
507+
BINARYEN_API BinaryenOp BinaryenDotSVecI16x8ToVecI32x4(void);
507508
BINARYEN_API BinaryenOp BinaryenNegVecI64x2(void);
508509
BINARYEN_API BinaryenOp BinaryenAnyTrueVecI64x2(void);
509510
BINARYEN_API BinaryenOp BinaryenAllTrueVecI64x2(void);

src/gen-s-parser.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,9 @@ switch (op[0]) {
14091409
default: goto parse_error;
14101410
}
14111411
}
1412+
case 'd':
1413+
if (strcmp(op, "i32x4.dot_i16x8_s") == 0) { return makeBinary(s, BinaryOp::DotSVecI16x8ToVecI32x4); }
1414+
goto parse_error;
14121415
case 'e': {
14131416
switch (op[7]) {
14141417
case 'q':

src/ir/cost.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,9 @@ struct CostAnalyzer : public Visitor<CostAnalyzer, Index> {
648648
case MaxUVecI32x4:
649649
ret = 1;
650650
break;
651+
case DotSVecI16x8ToVecI32x4:
652+
ret = 1;
653+
break;
651654
case AddVecI64x2:
652655
ret = 1;
653656
break;

src/js/binaryen.js-post.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ Module['MinSVecI16x8'] = Module['_BinaryenMinSVecI16x8']();
357357
Module['MinUVecI16x8'] = Module['_BinaryenMinUVecI16x8']();
358358
Module['MaxSVecI16x8'] = Module['_BinaryenMaxSVecI16x8']();
359359
Module['MaxUVecI16x8'] = Module['_BinaryenMaxUVecI16x8']();
360+
Module['DotSVecI16x8ToVecI32x4'] = Module['_BinaryenDotSVecI16x8ToVecI32x4']();
360361
Module['NegVecI32x4'] = Module['_BinaryenNegVecI32x4']();
361362
Module['AnyTrueVecI32x4'] = Module['_BinaryenAnyTrueVecI32x4']();
362363
Module['AllTrueVecI32x4'] = Module['_BinaryenAllTrueVecI32x4']();
@@ -1676,6 +1677,9 @@ function wrapModule(module, self) {
16761677
'max_u': function(left, right) {
16771678
return Module['_BinaryenBinary'](module, Module['MaxUVecI32x4'], left, right);
16781679
},
1680+
'dot_i16x8_s': function(left, right) {
1681+
return Module['_BinaryenBinary'](module, Module['DotSVecI16x8ToVecI32x4'], left, right);
1682+
},
16791683
'trunc_sat_f32x4_s': function(value) {
16801684
return Module['_BinaryenUnary'](module, Module['TruncSatSVecF32x4ToVecI32x4'], value);
16811685
},

src/literal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ class Literal {
364364
Literal minUI32x4(const Literal& other) const;
365365
Literal maxSI32x4(const Literal& other) const;
366366
Literal maxUI32x4(const Literal& other) const;
367+
Literal dotSI16x8toI32x4(const Literal& other) const;
367368
Literal negI64x2() const;
368369
Literal anyTrueI64x2() const;
369370
Literal allTrueI64x2() const;

src/passes/Print.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,9 @@ struct PrintExpressionContents
12221222
case MaxUVecI32x4:
12231223
o << "i32x4.max_u";
12241224
break;
1225+
case DotSVecI16x8ToVecI32x4:
1226+
o << "i32x4.dot_i16x8_s";
1227+
break;
12251228
case AddVecI64x2:
12261229
o << "i64x2.add";
12271230
break;

src/tools/fuzzing.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,6 +2169,7 @@ class TranslateToFuzzReader {
21692169
MinUVecI32x4,
21702170
MaxSVecI32x4,
21712171
MaxUVecI32x4,
2172+
DotSVecI16x8ToVecI32x4,
21722173
AddVecI64x2,
21732174
SubVecI64x2,
21742175
AddVecF32x4,

src/wasm-binary.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,7 @@ enum ASTNodes {
831831
I32x4MinU = 0x81,
832832
I32x4MaxS = 0x82,
833833
I32x4MaxU = 0x83,
834+
I32x4DotSVecI16x8 = 0xd9,
834835
I64x2Neg = 0x84,
835836
I64x2AnyTrue = 0x85,
836837
I64x2AllTrue = 0x86,

src/wasm-interpreter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,8 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
801801
return left.maxSI32x4(right);
802802
case MaxUVecI32x4:
803803
return left.maxUI32x4(right);
804+
case DotSVecI16x8ToVecI32x4:
805+
return left.dotSI16x8toI32x4(right);
804806
case AddVecI64x2:
805807
return left.addI64x2(right);
806808
case SubVecI64x2:

src/wasm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ enum BinaryOp {
383383
MinUVecI32x4,
384384
MaxSVecI32x4,
385385
MaxUVecI32x4,
386+
DotSVecI16x8ToVecI32x4,
386387
AddVecI64x2,
387388
SubVecI64x2,
388389
AddVecF32x4,

src/wasm/literal.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1829,6 +1829,17 @@ Literal Literal::maxF64x2(const Literal& other) const {
18291829
return binary<2, &Literal::getLanesF64x2, &Literal::max>(*this, other);
18301830
}
18311831

1832+
Literal Literal::dotSI16x8toI32x4(const Literal& other) const {
1833+
LaneArray<8> lhs = getLanesSI16x8();
1834+
LaneArray<8> rhs = other.getLanesSI16x8();
1835+
LaneArray<4> result;
1836+
for (size_t i = 0; i < 4; ++i) {
1837+
result[i] = Literal(lhs[i * 2].geti32() * rhs[i * 2].geti32() +
1838+
lhs[i * 2 + 1].geti32() * rhs[i * 2 + 1].geti32());
1839+
}
1840+
return Literal(result);
1841+
}
1842+
18321843
Literal Literal::bitselectV128(const Literal& left,
18331844
const Literal& right) const {
18341845
return andV128(left).orV128(notV128().andV128(right));

src/wasm/wasm-binary.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3939,6 +3939,10 @@ bool WasmBinaryBuilder::maybeVisitSIMDBinary(Expression*& out, uint32_t code) {
39393939
curr = allocator.alloc<Binary>();
39403940
curr->op = MaxUVecI32x4;
39413941
break;
3942+
case BinaryConsts::I32x4DotSVecI16x8:
3943+
curr = allocator.alloc<Binary>();
3944+
curr->op = DotSVecI16x8ToVecI32x4;
3945+
break;
39423946
case BinaryConsts::I64x2Add:
39433947
curr = allocator.alloc<Binary>();
39443948
curr->op = AddVecI64x2;

src/wasm/wasm-stack.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,6 +1459,10 @@ void BinaryInstWriter::visitBinary(Binary* curr) {
14591459
case MaxUVecI32x4:
14601460
o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I32x4MaxU);
14611461
break;
1462+
case DotSVecI16x8ToVecI32x4:
1463+
o << int8_t(BinaryConsts::SIMDPrefix)
1464+
<< U32LEB(BinaryConsts::I32x4DotSVecI16x8);
1465+
break;
14621466
case AddVecI64x2:
14631467
o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I64x2Add);
14641468
break;

src/wasm/wasm-validator.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,6 +1361,7 @@ void FunctionValidator::visitBinary(Binary* curr) {
13611361
case MinUVecI32x4:
13621362
case MaxSVecI32x4:
13631363
case MaxUVecI32x4:
1364+
case DotSVecI16x8ToVecI32x4:
13641365
case AddVecI64x2:
13651366
case SubVecI64x2:
13661367
case AddVecF32x4:

test/binaryen.js/kitchen-sink.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ function test_core() {
347347
module.i32x4.min_u(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),
348348
module.i32x4.max_s(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),
349349
module.i32x4.max_u(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),
350+
module.i32x4.dot_i16x8_s(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),
350351
module.i64x2.add(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),
351352
module.i64x2.sub(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),
352353
module.f32x4.add(module.v128.const(v128_bytes), module.v128.const(v128_bytes)),

0 commit comments

Comments
 (0)