@@ -287,18 +287,18 @@ enum {
287287#define RVOP_RUN_NEXT (!ir->tailcall)
288288#endif
289289
290- #define RVOP (inst , code ) \
291- static bool do_##inst(riscv_t *rv UNUSED , const rv_insn_t *ir UNUSED ) \
292- { \
293- rv->X[rv_reg_zero] = 0; \
294- code; \
295- rv->csr_cycle++; \
296- nextop: \
297- rv->PC += ir->insn_len; \
298- if (!RVOP_RUN_NEXT) \
299- return true; \
300- const rv_insn_t *next = ir + 1; \
301- MUST_TAIL return next->impl(rv, next); \
290+ #define RVOP (inst , code ) \
291+ static bool do_##inst(riscv_t *rv, const rv_insn_t *ir) \
292+ { \
293+ rv->X[rv_reg_zero] = 0; \
294+ rv->csr_cycle++; \
295+ code; \
296+ nextop: \
297+ rv->PC += ir->insn_len; \
298+ if (!RVOP_RUN_NEXT) \
299+ return true; \
300+ const rv_insn_t *next = ir + 1; \
301+ MUST_TAIL return next->impl(rv, next); \
302302 }
303303
304304/* RV32I Base Instruction Set */
@@ -1277,6 +1277,48 @@ RVOP(cswsp, {
12771277})
12781278#endif
12791279
1280+ /* auipc + addi */
1281+ RVOP (fuse1 , {
1282+ rv -> X [ir -> rd ] = (int32_t ) (rv -> PC + ir -> imm + ir -> imm2 );
1283+ rv -> PC += ir -> insn_len ;
1284+ })
1285+
1286+ /* auipc + add */
1287+ RVOP (fuse2 , {
1288+ rv -> X [ir -> rd ] = (int32_t ) (rv -> X [ir -> rs1 ]) + (int32_t ) (rv -> PC + ir -> imm );
1289+ rv -> PC += ir -> insn_len ;
1290+ })
1291+
1292+ /* multiple sw */
1293+ RVOP (fuse3 , {
1294+ mem_fuse_t * mem_fuse = ir -> mem_fuse ;
1295+ for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
1296+ const uint32_t addr = rv -> X [mem_fuse [i ].rs1 ] + mem_fuse [i ].imm ;
1297+ RV_EXC_MISALIGN_HANDLER (3 , store , false, 1 );
1298+ rv -> io .mem_write_w (rv , addr , rv -> X [mem_fuse [i ].rs2 ]);
1299+ }
1300+ rv -> PC += ir -> insn_len * (ir -> imm2 - 1 );
1301+ })
1302+
1303+ /* multiple lw */
1304+ RVOP (fuse4 , {
1305+ mem_fuse_t * mem_fuse = ir -> mem_fuse ;
1306+ for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
1307+ const uint32_t addr = rv -> X [mem_fuse [i ].rs1 ] + mem_fuse [i ].imm ;
1308+ RV_EXC_MISALIGN_HANDLER (3 , load , false, 1 );
1309+ rv -> X [mem_fuse [i ].rd ] = rv -> io .mem_read_w (rv , addr );
1310+ }
1311+ rv -> PC += ir -> insn_len * (ir -> imm2 - 1 );
1312+ })
1313+
1314+ static bool do_empty (riscv_t * rv , const rv_insn_t * ir )
1315+ {
1316+ rv -> X [rv_reg_zero ] = 0 ;
1317+ rv -> csr_cycle ++ ;
1318+ const rv_insn_t * next = ir + 1 ;
1319+ MUST_TAIL return next -> impl (rv , next );
1320+ }
1321+
12801322static const void * dispatch_table [] = {
12811323#define _ (inst , can_branch ) [rv_insn_ ##inst ] = do_ ##inst ,
12821324 RISCV_INSN_LIST
@@ -1407,6 +1449,92 @@ static void extend_block(riscv_t *rv, block_t *block)
14071449 last_ir -> branch_untaken = next -> ir ;
14081450}
14091451
1452+ static void match_pattern (block_t * block )
1453+ {
1454+ for (uint32_t i = 0 ; i < block -> n_insn - 1 ; i ++ ) {
1455+ rv_insn_t * ir = block -> ir + i , * next_ir = NULL ;
1456+ int32_t count = 0 ;
1457+ switch (ir -> opcode ) {
1458+ case rv_insn_auipc :
1459+ next_ir = ir + 1 ;
1460+ if (next_ir -> opcode == rv_insn_addi ) {
1461+ if (ir -> rd == next_ir -> rs1 ) {
1462+ ir -> opcode = rv_insn_fuse1 ;
1463+ ir -> rd = next_ir -> rd ;
1464+ ir -> imm2 = next_ir -> imm ;
1465+ ir -> impl = dispatch_table [ir -> opcode ];
1466+ next_ir -> opcode = rv_insn_empty ;
1467+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1468+ } else if (ir -> rd == next_ir -> rs2 ) {
1469+ ir -> opcode = rv_insn_fuse2 ;
1470+ ir -> rd = next_ir -> rd ;
1471+ ir -> rs1 = next_ir -> rs1 ;
1472+ ir -> impl = dispatch_table [ir -> opcode ];
1473+ next_ir -> opcode = rv_insn_empty ;
1474+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1475+ }
1476+ }
1477+ break ;
1478+ case rv_insn_sw :
1479+ count = 1 ;
1480+ for (uint32_t j = 1 ; j < block -> n_insn - 1 - i ; j ++ ) {
1481+ next_ir = ir + j ;
1482+ if (next_ir -> opcode != rv_insn_sw )
1483+ break ;
1484+ count ++ ;
1485+ }
1486+ if (count >= 5 ) {
1487+ ir -> opcode = rv_insn_fuse3 ;
1488+ ir -> mem_fuse = malloc (count * sizeof (mem_fuse_t ));
1489+ ir -> imm2 = count ;
1490+ ir -> mem_fuse [0 ].imm = ir -> imm ;
1491+ ir -> mem_fuse [0 ].rd = ir -> rd ;
1492+ ir -> mem_fuse [0 ].rs1 = ir -> rs1 ;
1493+ ir -> mem_fuse [0 ].rs2 = ir -> rs2 ;
1494+ ir -> impl = dispatch_table [ir -> opcode ];
1495+ for (int j = 1 ; j < count ; j ++ ) {
1496+ next_ir = ir + j ;
1497+ ir -> mem_fuse [j ].imm = next_ir -> imm ;
1498+ ir -> mem_fuse [j ].rd = next_ir -> rd ;
1499+ ir -> mem_fuse [j ].rs1 = next_ir -> rs1 ;
1500+ ir -> mem_fuse [j ].rs2 = next_ir -> rs2 ;
1501+ next_ir -> opcode = rv_insn_empty ;
1502+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1503+ }
1504+ }
1505+ break ;
1506+ case rv_insn_lw :
1507+ count = 1 ;
1508+ for (uint32_t j = 1 ; j < block -> n_insn - 1 - i ; j ++ ) {
1509+ next_ir = ir + j ;
1510+ if (next_ir -> opcode != rv_insn_lw )
1511+ break ;
1512+ count ++ ;
1513+ }
1514+ if (count >= 5 ) {
1515+ ir -> opcode = rv_insn_fuse4 ;
1516+ ir -> mem_fuse = malloc (count * sizeof (mem_fuse_t ));
1517+ ir -> imm2 = count ;
1518+ ir -> mem_fuse [0 ].imm = ir -> imm ;
1519+ ir -> mem_fuse [0 ].rd = ir -> rd ;
1520+ ir -> mem_fuse [0 ].rs1 = ir -> rs1 ;
1521+ ir -> mem_fuse [0 ].rs2 = ir -> rs2 ;
1522+ ir -> impl = dispatch_table [ir -> opcode ];
1523+ for (int j = 1 ; j < count ; j ++ ) {
1524+ next_ir = ir + j ;
1525+ ir -> mem_fuse [j ].imm = next_ir -> imm ;
1526+ ir -> mem_fuse [j ].rd = next_ir -> rd ;
1527+ ir -> mem_fuse [j ].rs1 = next_ir -> rs1 ;
1528+ ir -> mem_fuse [j ].rs2 = next_ir -> rs2 ;
1529+ next_ir -> opcode = rv_insn_empty ;
1530+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1531+ }
1532+ }
1533+ break ;
1534+ }
1535+ }
1536+ }
1537+
14101538static block_t * block_find_or_translate (riscv_t * rv , block_t * prev )
14111539{
14121540 block_map_t * map = & rv -> block_map ;
@@ -1425,6 +1553,9 @@ static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
14251553 /* translate the basic block */
14261554 block_translate (rv , next );
14271555
1556+ /* fuse instruction */
1557+ match_pattern (next );
1558+
14281559 /* insert the block into block map */
14291560 block_insert (& rv -> block_map , next );
14301561
0 commit comments