@@ -31,6 +31,7 @@ extern struct target_ops gdbstub_ops;
31
31
#include "decode.h"
32
32
#include "riscv.h"
33
33
#include "riscv_private.h"
34
+ #include "state.h"
34
35
#include "utils.h"
35
36
36
37
/* RISC-V exception code list */
@@ -1219,6 +1220,60 @@ RVOP(cswsp, {
1219
1220
})
1220
1221
#endif
1221
1222
1223
+ /* auipc + addi */
1224
+ RVOP (fuse1 , {
1225
+ rv -> X [ir -> rd ] = (int32_t ) (rv -> PC + ir -> imm + ir -> imm2 );
1226
+ rv -> PC += ir -> insn_len ;
1227
+ })
1228
+
1229
+ /* auipc + add */
1230
+ RVOP (fuse2 , {
1231
+ rv -> X [ir -> rd ] = (int32_t ) (rv -> X [ir -> rs1 ]) + (int32_t ) (rv -> PC + ir -> imm );
1232
+ rv -> PC += ir -> insn_len ;
1233
+ })
1234
+
1235
+ /* multiple sw */
1236
+ RVOP (fuse3 , {
1237
+ opcode_fuse_t * fuse = ir -> fuse ;
1238
+ uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1239
+ /* the memory addresses of the sw instructions are contiguous, so we only
1240
+ * need to check the first sw instruction to determine if its memory address
1241
+ * is misaligned or if the memory chunk does not exist.
1242
+ */
1243
+ RV_EXC_MISALIGN_HANDLER (3 , store , false, 1 );
1244
+ rv -> io .mem_write_w (rv , addr , rv -> X [fuse [0 ].rs2 ]);
1245
+ for (int i = 1 ; i < ir -> imm2 ; i ++ ) {
1246
+ addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1247
+ rv -> io .mem_write_w (rv , addr , rv -> X [fuse [i ].rs2 ]);
1248
+ }
1249
+ rv -> PC += ir -> insn_len * (ir -> imm2 - 1 );
1250
+ })
1251
+
1252
+ /* multiple lw */
1253
+ RVOP (fuse4 , {
1254
+ opcode_fuse_t * fuse = ir -> fuse ;
1255
+ uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1256
+ /* the memory addresses of the lw instructions are contiguous, so we only
1257
+ * need to check the first lw instruction to determine if its memory address
1258
+ * is misaligned or if the memory chunk does not exist.
1259
+ */
1260
+ RV_EXC_MISALIGN_HANDLER (3 , load , false, 1 );
1261
+ rv -> X [fuse [0 ].rd ] = rv -> io .mem_read_w (rv , addr );
1262
+ for (int i = 1 ; i < ir -> imm2 ; i ++ ) {
1263
+ addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1264
+ rv -> X [fuse [i ].rd ] = rv -> io .mem_read_w (rv , addr );
1265
+ }
1266
+ rv -> PC += ir -> insn_len * (ir -> imm2 - 1 );
1267
+ })
1268
+
1269
+ static bool do_empty (riscv_t * rv , const rv_insn_t * ir )
1270
+ {
1271
+ rv -> X [rv_reg_zero ] = 0 ;
1272
+ rv -> csr_cycle ++ ;
1273
+ const rv_insn_t * next = ir + 1 ;
1274
+ MUST_TAIL return next -> impl (rv , next );
1275
+ }
1276
+
1222
1277
static const void * dispatch_table [] = {
1223
1278
#define _ (inst , can_branch ) [rv_insn_ ##inst ] = do_ ##inst ,
1224
1279
RISCV_INSN_LIST
@@ -1337,7 +1392,6 @@ static void block_translate(riscv_t *rv, block_t *block)
1337
1392
/* compute the end of pc */
1338
1393
block -> pc_end += ir -> insn_len ;
1339
1394
block -> n_insn ++ ;
1340
-
1341
1395
/* stop on branch */
1342
1396
if (insn_is_branch (ir -> opcode )) {
1343
1397
/* recursive jump translation */
@@ -1356,6 +1410,78 @@ static void block_translate(riscv_t *rv, block_t *block)
1356
1410
block -> ir [block -> n_insn - 1 ].tailcall = true;
1357
1411
}
1358
1412
1413
+ #define pack_memory_operation (RW ) \
1414
+ count = 1; \
1415
+ next_ir = ir + 1; \
1416
+ if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw)) \
1417
+ break; \
1418
+ sign = (ir->imm - next_ir->imm) >> 31 ? -1 : 1; \
1419
+ for (uint32_t j = 1; j < block->n_insn - 1 - i; j++) { \
1420
+ next_ir = ir + j; \
1421
+ if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw) || \
1422
+ ir->rs1 != next_ir->rs1 || ir->imm - next_ir->imm != 4 * sign) \
1423
+ break; \
1424
+ count++; \
1425
+ } \
1426
+ if (count > 1) { \
1427
+ ir->opcode = IIF(RW)(rv_insn_fuse4, rv_insn_fuse3); \
1428
+ ir->fuse = malloc(count * sizeof(opcode_fuse_t)); \
1429
+ ir->imm2 = count; \
1430
+ memcpy(ir->fuse, ir, sizeof(opcode_fuse_t)); \
1431
+ ir->impl = dispatch_table[ir->opcode]; \
1432
+ for (int j = 1; j < count; j++) { \
1433
+ next_ir = ir + j; \
1434
+ memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
1435
+ next_ir->opcode = rv_insn_empty; \
1436
+ next_ir->impl = dispatch_table[next_ir->opcode]; \
1437
+ } \
1438
+ } \
1439
+ break;
1440
+
1441
+
1442
+ /* examine whether instructions in a block match a specific pattern. If so,
1443
+ * rewrite them into fused instructions. */
1444
+ static void match_pattern (block_t * block )
1445
+ {
1446
+ for (uint32_t i = 0 ; i < block -> n_insn - 1 ; i ++ ) {
1447
+ rv_insn_t * ir = block -> ir + i , * next_ir = NULL ;
1448
+ int32_t count = 0 , sign = 1 ;
1449
+ switch (ir -> opcode ) {
1450
+ case rv_insn_auipc :
1451
+ next_ir = ir + 1 ;
1452
+ if (next_ir -> opcode == rv_insn_addi && ir -> rd == next_ir -> rs1 ) {
1453
+ /* the destination register of instruction auipc is equal to the
1454
+ * source register 1 of next instruction addi */
1455
+ ir -> opcode = rv_insn_fuse1 ;
1456
+ ir -> rd = next_ir -> rd ;
1457
+ ir -> imm2 = next_ir -> imm ;
1458
+ ir -> impl = dispatch_table [ir -> opcode ];
1459
+ next_ir -> opcode = rv_insn_empty ;
1460
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1461
+ } else if (next_ir -> opcode == rv_insn_add &&
1462
+ ir -> rd == next_ir -> rs2 ) {
1463
+ /* the destination register of instruction auipc is equal to the
1464
+ * source register 2 of next instruction add */
1465
+ ir -> opcode = rv_insn_fuse2 ;
1466
+ ir -> rd = next_ir -> rd ;
1467
+ ir -> rs1 = next_ir -> rs1 ;
1468
+ ir -> impl = dispatch_table [ir -> opcode ];
1469
+ next_ir -> opcode = rv_insn_empty ;
1470
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1471
+ }
1472
+ break ;
1473
+ case rv_insn_sw :
1474
+ /* If the memory addresses of a sequence of store instructions for
1475
+ * data are contiguous, pack these instructions. */
1476
+ pack_memory_operation (0 );
1477
+ case rv_insn_lw :
1478
+ /* If the memory addresses of a sequence of load instructions for
1479
+ * data are contiguous, pack these instructions. */
1480
+ pack_memory_operation (1 );
1481
+ }
1482
+ }
1483
+ }
1484
+
1359
1485
static block_t * prev = NULL ;
1360
1486
static block_t * block_find_or_translate (riscv_t * rv )
1361
1487
{
@@ -1375,6 +1501,9 @@ static block_t *block_find_or_translate(riscv_t *rv)
1375
1501
/* translate the basic block */
1376
1502
block_translate (rv , next );
1377
1503
1504
+ /* macro operation fusion */
1505
+ match_pattern (next );
1506
+
1378
1507
/* insert the block into block map */
1379
1508
block_insert (& rv -> block_map , next );
1380
1509
0 commit comments