File tree Expand file tree Collapse file tree 1 file changed +19
-0
lines changed
Expand file tree Collapse file tree 1 file changed +19
-0
lines changed Original file line number Diff line number Diff line change @@ -1644,6 +1644,25 @@ walk_order_t compute_walk_order(const config_t &cfg) {
16441644 size_t ab_bytes = get_memory_footprint (cfg, inner, outer);
16451645 if (ab_bytes <= l3_size) grid_inner = std::move (outer);
16461646 }
1647+
1648+ // Prefer square spatial dimensions to increase cache reuse due to iteration
1649+ // over kernel spatial dimensions.
1650+ if (cfg.prb ().is_fwd && cfg.loop_dim (pvars::kh) > 1 && cfg.prb ().sh == 1 ) {
1651+ auto &w_inner = grid_inner[pvars::ow];
1652+ auto &h_inner = grid_inner[pvars::oh];
1653+ auto rebalance_spatial = [&]() {
1654+ if (grid_tile[pvars::oh] % (h_inner * 2 )) return false ;
1655+ if (w_inner % 2 ) return false ;
1656+ if (w_inner < h_inner * 4 ) return false ;
1657+ return true ;
1658+ };
1659+
1660+ while (rebalance_spatial ()) {
1661+ w_inner /= 2 ;
1662+ h_inner *= 2 ;
1663+ }
1664+ }
1665+
16471666 // Add the blocks in this order:
16481667 // - Step 1. Add grid_inner blocks (fitting L3 cache)
16491668 // - Step 2. Add the remaining M/N blocks
You can’t perform that action at this time.
0 commit comments