@@ -1251,15 +1251,38 @@ RVOP(cswsp, {
1251
1251
#endif
1252
1252
1253
1253
/* auipc + addi */
1254
- RVOP (fuse1 , { rv -> X [ir -> rd ] = (int32_t ) (rv -> PC + ir -> imm + ir -> imm2 ); })
1254
+ static bool do_fuse1 (riscv_t * rv , const rv_insn_t * ir )
1255
+ {
1256
+ rv -> X [rv_reg_zero ] = 0 ;
1257
+ rv -> csr_cycle += 2 ;
1258
+ rv -> X [ir -> rd ] = rv -> PC + ir -> imm ;
1259
+ rv -> X [ir -> rs1 ] = rv -> X [ir -> rd ] + ir -> imm2 ;
1260
+ rv -> PC += 2 * ir -> insn_len ;
1261
+ if (unlikely (RVOP_NO_NEXT (ir )))
1262
+ return true;
1263
+ const rv_insn_t * next = ir + 2 ;
1264
+ MUST_TAIL return next -> impl (rv , next );
1265
+ }
1255
1266
1256
1267
/* auipc + add */
1257
- RVOP (fuse2 , {
1258
- rv -> X [ir -> rd ] = (int32_t ) (rv -> X [ir -> rs1 ]) + (int32_t ) (rv -> PC + ir -> imm );
1259
- })
1268
+ static bool do_fuse2 (riscv_t * rv , const rv_insn_t * ir )
1269
+ {
1270
+ rv -> X [rv_reg_zero ] = 0 ;
1271
+ rv -> csr_cycle += 2 ;
1272
+ rv -> X [ir -> rd ] = rv -> PC + ir -> imm ;
1273
+ rv -> X [ir -> rs2 ] = rv -> X [ir -> rd ] + rv -> X [ir -> rs1 ];
1274
+ rv -> PC += 2 * ir -> insn_len ;
1275
+ if (unlikely (RVOP_NO_NEXT (ir )))
1276
+ return true;
1277
+ const rv_insn_t * next = ir + 2 ;
1278
+ MUST_TAIL return next -> impl (rv , next );
1279
+ }
1260
1280
1261
1281
/* multiple sw */
1262
- RVOP (fuse3 , {
1282
+ static bool do_fuse3 (riscv_t * rv , const rv_insn_t * ir )
1283
+ {
1284
+ rv -> X [rv_reg_zero ] = 0 ;
1285
+ rv -> csr_cycle += ir -> imm2 ;
1263
1286
opcode_fuse_t * fuse = ir -> fuse ;
1264
1287
uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1265
1288
/* the memory addresses of the sw instructions are contiguous, so we only
@@ -1272,10 +1295,18 @@ RVOP(fuse3, {
1272
1295
addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1273
1296
rv -> io .mem_write_w (addr , rv -> X [fuse [i ].rs2 ]);
1274
1297
}
1275
- })
1298
+ rv -> PC += ir -> imm2 * ir -> insn_len ;
1299
+ if (unlikely (RVOP_NO_NEXT (ir )))
1300
+ return true;
1301
+ const rv_insn_t * next = ir + ir -> imm2 ;
1302
+ MUST_TAIL return next -> impl (rv , next );
1303
+ }
1276
1304
1277
1305
/* multiple lw */
1278
- RVOP (fuse4 , {
1306
+ static bool do_fuse4 (riscv_t * rv , const rv_insn_t * ir )
1307
+ {
1308
+ rv -> X [rv_reg_zero ] = 0 ;
1309
+ rv -> csr_cycle += ir -> imm2 ;
1279
1310
opcode_fuse_t * fuse = ir -> fuse ;
1280
1311
uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1281
1312
/* the memory addresses of the lw instructions are contiguous, so we only
@@ -1288,7 +1319,26 @@ RVOP(fuse4, {
1288
1319
addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1289
1320
rv -> X [fuse [i ].rd ] = rv -> io .mem_read_w (addr );
1290
1321
}
1291
- })
1322
+ rv -> PC += ir -> imm2 * ir -> insn_len ;
1323
+ if (unlikely (RVOP_NO_NEXT (ir )))
1324
+ return true;
1325
+ const rv_insn_t * next = ir + ir -> imm2 ;
1326
+ MUST_TAIL return next -> impl (rv , next );
1327
+ }
1328
+
1329
+ /* lui + addi */
1330
+ static bool do_fuse5 (riscv_t * rv , const rv_insn_t * ir )
1331
+ {
1332
+ rv -> X [rv_reg_zero ] = 0 ;
1333
+ rv -> csr_cycle += 2 ;
1334
+ rv -> X [ir -> rd ] = ir -> imm ;
1335
+ rv -> X [ir -> rs1 ] = ir -> imm + ir -> imm2 ;
1336
+ rv -> PC += 2 * ir -> insn_len ;
1337
+ if (unlikely (RVOP_NO_NEXT (ir )))
1338
+ return true;
1339
+ const rv_insn_t * next = ir + 2 ;
1340
+ MUST_TAIL return next -> impl (rv , next );
1341
+ }
1292
1342
1293
1343
static const void * dispatch_table [] = {
1294
1344
#define _ (inst , can_branch ) [rv_insn_ ##inst ] = do_ ##inst ,
@@ -1448,9 +1498,8 @@ static void block_translate(riscv_t *rv, block_t *block)
1448
1498
for (int j = 1; j < count; j++) { \
1449
1499
next_ir = ir + j; \
1450
1500
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
1451
- next_ir->opcode = rv_insn_nop; \
1452
- next_ir->impl = dispatch_table[next_ir->opcode]; \
1453
1501
} \
1502
+ ir->tailcall = next_ir->tailcall; \
1454
1503
}
1455
1504
1456
1505
/* examine whether instructions in a block match a specific pattern. If so,
@@ -1469,25 +1518,32 @@ static void match_pattern(block_t *block)
1469
1518
next_ir = ir + 1 ;
1470
1519
if (next_ir -> opcode == rv_insn_addi && ir -> rd == next_ir -> rs1 ) {
1471
1520
/* the destination register of instruction auipc is equal to the
1472
- * source register 1 of next instruction addi */
1521
+ * source register 1 of next instruction addi.
1522
+ */
1473
1523
ir -> opcode = rv_insn_fuse1 ;
1474
- ir -> rd = next_ir -> rd ;
1524
+ ir -> rs1 = next_ir -> rd ;
1475
1525
ir -> imm2 = next_ir -> imm ;
1476
1526
ir -> impl = dispatch_table [ir -> opcode ];
1477
- next_ir -> opcode = rv_insn_nop ;
1478
- next_ir -> impl = dispatch_table [next_ir -> opcode ];
1527
+ ir -> tailcall = next_ir -> tailcall ;
1479
1528
} else if (next_ir -> opcode == rv_insn_add &&
1480
1529
ir -> rd == next_ir -> rs2 ) {
1481
1530
/* the destination register of instruction auipc is equal to the
1482
1531
* source register 2 of next instruction add */
1483
1532
ir -> opcode = rv_insn_fuse2 ;
1484
- ir -> rd = next_ir -> rd ;
1533
+ ir -> rs2 = next_ir -> rd ;
1485
1534
ir -> rs1 = next_ir -> rs1 ;
1486
1535
ir -> impl = dispatch_table [ir -> opcode ];
1487
- next_ir -> opcode = rv_insn_nop ;
1488
- next_ir -> impl = dispatch_table [next_ir -> opcode ];
1536
+ } else if (next_ir -> opcode == rv_insn_add &&
1537
+ ir -> rd == next_ir -> rs1 ) {
1538
+ /* the destination register of instruction auipc is equal to the
1539
+ * source register 1 of next instruction add */
1540
+ ir -> opcode = rv_insn_fuse2 ;
1541
+ ir -> rs2 = next_ir -> rd ;
1542
+ ir -> rs1 = next_ir -> rs2 ;
1543
+ ir -> impl = dispatch_table [ir -> opcode ];
1489
1544
}
1490
1545
break ;
1546
+
1491
1547
/* If the memory addresses of a sequence of store or load instructions
1492
1548
* are contiguous, combine these instructions.
1493
1549
*/
@@ -1497,7 +1553,19 @@ static void match_pattern(block_t *block)
1497
1553
case rv_insn_lw :
1498
1554
COMBINE_MEM_OPS (1 );
1499
1555
break ;
1500
- /* FIXME: lui + addi */
1556
+ case rv_insn_lui :
1557
+ next_ir = ir + 1 ;
1558
+ if (next_ir -> opcode == rv_insn_addi && ir -> rd == next_ir -> rs1 ) {
1559
+ /* the destination register of instruction lui is equal to
1560
+ * the source register 1 of next instruction addi.
1561
+ */
1562
+ ir -> opcode = rv_insn_fuse5 ;
1563
+ ir -> rs1 = next_ir -> rd ;
1564
+ ir -> imm2 = next_ir -> imm ;
1565
+ ir -> impl = dispatch_table [ir -> opcode ];
1566
+ ir -> tailcall = next_ir -> tailcall ;
1567
+ }
1568
+ break ;
1501
1569
/* TODO: mixture of sw and lw */
1502
1570
/* TODO: reorder insturction to match pattern */
1503
1571
}
0 commit comments