gpu: jit: do not rewrite 64-bit exprs after overflow fix pass

echeresh · echeresh · commit 12d57430c66e · 2023-04-14T11:34:43.000-07:00
diff --git a/src/gpu/jit/conv/ir_builder.cpp b/src/gpu/jit/conv/ir_builder.cpp
@@ -755,6 +755,7 @@ void conv_ir_builder_t::build() {
             cfg_.reserved_regs());
     stmt_ = split_shuffle(stmt_, ir_ctx);
     stmt_ = fixup_if_conditions(stmt_, ir_ctx);
+    stmt_ = optimize_int64_exprs(stmt_, ir_ctx);
     stmt_ = fix_int32_overflow(stmt_, ir_ctx);
     stmt_ = eliminate_common_subexprs(
             stmt_, ir_ctx, cfg_.reserved_regs(), cfg_.slm().gmem_bufs());
diff --git a/src/gpu/jit/pass/pass.cpp b/src/gpu/jit/pass/pass.cpp
@@ -18,6 +18,7 @@
 
 #include "gpu/jit/ir/message.hpp"
 #include "gpu/jit/ir/reorder.hpp"
+#include "gpu/jit/pass/simplify.hpp"
 #include "gpu/jit/utils/trace.hpp"
 
 namespace dnnl {
@@ -176,6 +177,35 @@ stmt_t fixup_if_conditions(const stmt_t &s, ir_context_t &ir_ctx) {
     return ret;
 }
 
+class int64_expr_optimizer_t : public ir_mutator_t {
+public:
+#define HANDLE_IR_OBJECT(type) \
+    object_t _mutate(const type &obj) override { return mutate_expr(obj); }
+
+    HANDLE_EXPR_IR_OBJECTS()
+
+#undef HANDLE_IR_OBJECT
+
+private:
+    template <typename T>
+    object_t mutate_expr(const T &obj) {
+        auto new_obj = ir_mutator_t::_mutate(obj);
+        if (auto *binary = new_obj.template as_ptr<binary_op_t>()) {
+            if (binary->op_kind == op_kind_t::_add) {
+                new_obj = simplify_64_bit_add(new_obj);
+            }
+        }
+        return new_obj;
+    }
+};
+
+stmt_t optimize_int64_exprs(const stmt_t &s, ir_context_t &ir_ctx) {
+    trace_start();
+    auto ret = int64_expr_optimizer_t().mutate(s);
+    trace_pass("optimize_int64_exprs", ret, ir_ctx);
+    return ret;
+}
+
 } // namespace jit
 } // namespace gpu
 } // namespace impl
diff --git a/src/gpu/jit/pass/pass.hpp b/src/gpu/jit/pass/pass.hpp
@@ -57,6 +57,10 @@ stmt_t split_wide_stores(const stmt_t &s, ir_context_t &ir_ctx);
 //     if (bcast8(cond)) { ... }
 stmt_t fixup_if_conditions(const stmt_t &s, ir_context_t &ir_ctx);
 
+// Rewrites mixed 64-bit/32-bit expressions to reduce 64-bit arithmetic.
+// Potential overflow is ignored and must be checked/fixed by further passes.
+stmt_t optimize_int64_exprs(const stmt_t &s, ir_context_t &ir_ctx);
+
 } // namespace jit
 } // namespace gpu
 } // namespace impl
diff --git a/src/gpu/jit/pass/simplify.cpp b/src/gpu/jit/pass/simplify.cpp
@@ -1521,18 +1521,6 @@ expr_t reorder_nary_add_args(const expr_t &e, bool x64_first) {
     return nary_op_t::make(nary_op->op_kind, new_args);
 }
 
-// Rewrites addition with mixed 64-bit/32-bit expressions to reduce 64-bit
-// arithmetic. Example:
-// Before: ((x.s64 + y.s32) + z.s32) [two 64-bit add]
-// After:  ((y.s32 + z.s32) + x.s64) [one 32-bit add and one 64-bit add]
-class _64_bit_add_optimizer_t : public nary_op_mutator_t {
-public:
-    object_t _mutate(const nary_op_t &obj) override {
-        auto new_obj = nary_op_mutator_t::_mutate(obj);
-        return reorder_nary_add_args(new_obj, /*x64_first=*/false);
-    }
-};
-
 // Simplifies using the N-ary form.
 expr_t simplify_with_nary(const expr_t &_e, const constraint_set_t &cset) {
     auto e = _e;
@@ -1545,13 +1533,30 @@ expr_t simplify_with_nary(const expr_t &_e, const constraint_set_t &cset) {
     e = int_div_mod_expander_t(cset).mutate(e);
     e = common_factor_simplifier_t().mutate(e);
     e = int_div_mod_range_simplifier_t(cset).mutate(e);
-    e = _64_bit_add_optimizer_t().mutate(e);
 
     e = nary_op_back_transform(e);
 
     return e;
 }
 
+class _64_bit_add_optimizer_t : public nary_op_mutator_t {
+public:
+    object_t _mutate(const nary_op_t &obj) override {
+        auto new_obj = nary_op_mutator_t::_mutate(obj);
+        return reorder_nary_add_args(new_obj, /*x64_first=*/false);
+    }
+};
+
+expr_t simplify_64_bit_add(const expr_t &_e) {
+    auto e = _e;
+
+    e = nary_op_canonicalize(e);
+    e = _64_bit_add_optimizer_t().mutate(e);
+    e = nary_op_back_transform(e);
+
+    return e;
+}
+
 class stmt_simplifier_t : public ir_mutator_t {
 public:
     stmt_simplifier_t(const constraint_set_t &cset) : cset_(cset) {}
diff --git a/src/gpu/jit/pass/simplify.hpp b/src/gpu/jit/pass/simplify.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022 Intel Corporation
+* Copyright 2022-2023 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -43,6 +43,12 @@ expr_t simplify_rewrite_with_ternary(const expr_t &e, bool recursive = true);
 // Example: (c0 + x) op c1 -> x op (c1 - c0)
 expr_t simplify_cmp_move_const_to_rhs(const expr_t &e);
 
+// Rewrites addition with mixed 64-bit/32-bit expressions to reduce 64-bit
+// arithmetic. Example:
+// Before: ((x.s64 + y.s32) + z.s32) [two 64-bit add]
+// After:  ((y.s32 + z.s32) + x.s64) [one 32-bit add and one 64-bit add]
+expr_t simplify_64_bit_add(const expr_t &e);
+
 // Reduces left and right hand sides of an expression.
 // Example: A * x < A * B -> x < B (if A > 0).
 expr_t simplify_cmp_reduce_lhs_rhs(const expr_t &e);