diff --git a/coreblocks/cache/dcache.py b/coreblocks/cache/dcache.py new file mode 100644 index 000000000..a367eec7d --- /dev/null +++ b/coreblocks/cache/dcache.py @@ -0,0 +1,646 @@ +from amaranth import * +from amaranth.lib.data import View +import amaranth.lib.memory as memory +from amaranth.utils import exact_log2 + +from transactron.core import Priority, TModule +from transactron import Method, def_method, Transaction +from coreblocks.params import DCacheParameters +from transactron.utils import assign +from transactron.lib import * +from transactron.lib import logging +from transactron.lib.simultaneous import condition + +from coreblocks.cache.iface import CacheInterface, DataCacheRefillerInterface +from coreblocks.peripherals.bus_adapter import BusMasterInterface +from transactron.utils.transactron_helpers import make_layout + +__all__ = [ + "DCache", + "DCacheBypass", +] + +from coreblocks.interface.layouts import DCacheLayouts + +log = logging.HardwareLogger("backend.dcache") + + +class DCacheBypass(Elaboratable, CacheInterface): + def __init__(self, layouts: DCacheLayouts, params: DCacheParameters, bus_master: BusMasterInterface) -> None: + self.layouts = layouts + self.params = params + self.bus_master = bus_master + + self.issue_req = Method(i=layouts.issue_req) + self.accept_res = Method(o=layouts.accept_res) + self.flush = Method() + + if params.word_width != bus_master.params.data_width: + raise ValueError("Data cache bypass word width must match bus data width.") + if bus_master.params.granularity != 8: + raise ValueError("Data cache bypass expects byte-granular bus selects.") + + def elaborate(self, platform): + m = TModule() + + m.submodules.store_fifo = store_fifo = BasicFifo([("store", 1)], self.params.request_depth) + + @def_method(m, self.issue_req) + def _(addr: Value, data: Value, byte_mask: Value, store: Value): + bus_addr = addr >> exact_log2(self.params.word_width_bytes) + + with condition(m) as branch: + with branch(store): + self.bus_master.request_write(m, addr=bus_addr, data=data, sel=byte_mask) + with branch(): + self.bus_master.request_read(m, addr=bus_addr, sel=byte_mask) + + store_fifo.write(m, store=store) + + @def_method(m, self.accept_res) + def _(): + request = store_fifo.read(m) + data = Signal(self.params.word_width) + error = Signal() + + with condition(m) as branch: + with branch(request.store): + res = self.bus_master.get_write_response(m) + m.d.comb += error.eq(res.err) + with branch(): + res = self.bus_master.get_read_response(m) + m.d.comb += [ + data.eq(res.data), + error.eq(res.err), + ] + + return {"data": data, "error": error} + + @def_method(m, self.flush) + def _() -> None: + pass + + return m + + +class DCache(Elaboratable, CacheInterface): + """A simple write-back data cache + + Single-core design — no coherence protocol. + + Replacement policy: pseudo-random round-robin (same as ICache). + """ + + def __init__(self, layouts: DCacheLayouts, params, refiller: DataCacheRefillerInterface) -> None: + """ + Parameters + ---------- + layouts : DCacheLayouts + Layouts for D-cache methods. + params : DCacheParameters + Cache geometry and configuration. + refiller : CacheRefillerInterface + Refiller with writeback support for dirty line eviction. + """ + self.layouts = layouts + self.params = params + self.refiller = refiller + + # Methods + self.issue_req = Method(i=layouts.issue_req) + self.accept_res = Method(o=layouts.accept_res) + self.issue_req.add_conflict(self.accept_res, Priority.LEFT) + + self.flush = Method() + self.flush.add_conflict(self.issue_req, Priority.LEFT) + + # Method called by refiller AFTER cache started writeback (called start_writeback method in refiller) + self.provide_writeback_data = Method(o=layouts.provide_writeback_data) + + self.addr_layout = make_layout( + ("offset", self.params.offset_bits), + ("index", self.params.index_bits), + ("tag", self.params.tag_bits), + ) + + self.perf_loads = HwCounter("backend.dcache.loads") + self.perf_stores = HwCounter("backend.dcache.stores") + self.perf_hits = HwCounter("backend.dcache.hits") + self.perf_misses = HwCounter("backend.dcache.misses") + self.perf_writebacks = HwCounter("backend.dcache.writebacks") + self.perf_errors = HwCounter("backend.dcache.errors") + self.perf_flushes = HwCounter("backend.dcache.flushes") + + def deserialize_addr(self, raw_addr: Value) -> dict[str, Value]: + return { + "offset": raw_addr[: self.params.offset_bits], + "index": raw_addr[self.params.index_start_bit : self.params.index_end_bit + 1], + "tag": raw_addr[-self.params.tag_bits :], + } + + def serialize_addr(self, addr: View) -> Value: + return Cat(addr.offset, addr.index, addr.tag) + + def elaborate(self, platform): + m = TModule() + + m.submodules += [ + self.perf_loads, + self.perf_stores, + self.perf_hits, + self.perf_misses, + self.perf_writebacks, + self.perf_errors, + self.perf_flushes, + ] + + m.submodules.mem = self.mem = DCacheMemory(self.params) + m.submodules.req_fifo = req_fifo = BasicFifo(self.layouts.issue_req, self.params.request_depth) + m.submodules.res_fifo = res_fifo = BasicFifo(self.layouts.accept_res, self.params.request_depth) + + rr_way = Signal(range(self.params.num_of_ways)) # Round-robin state + rr_used = Signal() + outstanding = Signal(range(self.params.request_depth + 1)) + + flush_start = Signal() + flush_finish = Signal() + needs_writeback = Signal() # if we missed and victim is dirty, writeback the victim and load new line + needs_refill = Signal() + refill_finish = Signal() + writeback_done_error = Signal() + writeback_done_flush = Signal() + writeback_done_refill = Signal() + + wb_way = Signal(range(self.params.num_of_ways)) + wb_index = Signal(self.params.index_bits) + wb_word_counter = Signal(range(self.params.words_in_line)) + + wb_triggered_by_flush = Signal() + flush_writeback_pending = Signal() + + # Request/response communication (lookup) + pending_req = Signal(self.layouts.issue_req) + pending_req_valid = Signal() + lookup_addr = Signal(self.addr_layout) + lookup_valid = Signal() + + # Refill state + refill_addr = Signal(self.addr_layout) + refill_way = Signal(range(self.params.num_of_ways)) + refill_error = Signal() + + with m.FSM(init="FLUSH") as fsm: + with m.State("FLUSH"): + with m.If(flush_writeback_pending): + m.next = "WRITEBACK" + with m.Elif(flush_finish): + m.next = "LOOKUP" + + with m.State("LOOKUP"): + with m.If(flush_start): + m.next = "FLUSH" + with m.Elif(needs_writeback): + m.next = "WRITEBACK" + with m.Elif(needs_refill): + m.next = "REFILL" + + with m.State("REFILL"): + with m.If(refill_finish): + m.next = "LOOKUP" + + with m.State("WRITEBACK"): + with m.If(writeback_done_error): + with m.If(wb_triggered_by_flush): + m.d.sync += [ + wb_triggered_by_flush.eq(0), + flush_writeback_pending.eq(0), + ] + m.next = "LOOKUP" + with m.Elif(writeback_done_flush): + m.d.sync += [ + wb_triggered_by_flush.eq(0), + flush_writeback_pending.eq(0), + ] + m.next = "FLUSH" + with m.Elif(writeback_done_refill): + m.next = "REFILL" + + with m.If(fsm.ongoing("LOOKUP") & pending_req_valid & ~lookup_valid): + m.d.sync += lookup_valid.eq(1) + + with Transaction(name="StartLookupFromQueue").body( + m, ready=fsm.ongoing("LOOKUP") & ~pending_req_valid & ~lookup_valid + ): + req = req_fifo.read(m) + deserialized = self.deserialize_addr(req.addr) + m.d.sync += [ + pending_req.addr.eq(req.addr), + pending_req.data.eq(req.data), + pending_req.byte_mask.eq(req.byte_mask), + pending_req.store.eq(req.store), + pending_req_valid.eq(1), + assign(lookup_addr, deserialized), + lookup_valid.eq(0), + ] + + # ------------- FLUSH ------- + # Iterates through all sets, checks dirty bits, starts writeback if needed, then invalidates. + # 2 cycles per set: cycle 1 = SRAM read (wait), cycle 2 = process tag_rd_data. + # TODO: optimize to 1 cycle per set + + flush_index = Signal(self.params.index_bits) + flush_data_valid = Signal() # high when tag_rd_data is valid for current flush_index + + # SRAM read latency: wait 1 cycle for data + with m.If(fsm.ongoing("FLUSH") & ~flush_data_valid): + m.d.sync += flush_data_valid.eq(1) + + with m.If(~fsm.ongoing("FLUSH")): + m.d.sync += flush_data_valid.eq(0) + + # Connect SRAM read address to flush_index during FLUSH + with m.If(fsm.ongoing("FLUSH")): + m.d.comb += self.mem.tag_rd_index.eq(flush_index) + + @def_method( + m, + self.flush, + ready=fsm.ongoing("LOOKUP") & (outstanding == 0) & ~pending_req_valid & ~lookup_valid, + ) + def _(): + log.info(m, True, "Flushing the cache...") + m.d.sync += flush_index.eq(0) + m.d.sync += flush_data_valid.eq(0) + m.d.comb += flush_start.eq(1) + + with Transaction(name="Flush").body(m, ready=fsm.ongoing("FLUSH") & flush_data_valid): + # tag_rd_data is valid for current flush_index + + any_dirty = Signal() + dirty_way = Signal(range(self.params.num_of_ways)) + + for i in range(self.params.num_of_ways): + tag_data = self.mem.tag_rd_data[i] + with m.If(tag_data.valid & tag_data.dirty): + m.d.comb += any_dirty.eq(1) + m.d.comb += dirty_way.eq(i) + + with m.If(any_dirty): + # Writeback the dirty way, then come back to re-check this set + wb_addr = Signal(self.addr_layout) + m.d.comb += [ + wb_addr.offset.eq(0), + wb_addr.index.eq(flush_index), + wb_addr.tag.eq(self.mem.tag_rd_data[dirty_way].tag), + ] + self.refiller.start_writeback(m, addr=self.serialize_addr(wb_addr)) + m.d.sync += [ + wb_way.eq(dirty_way), + wb_index.eq(flush_index), + wb_word_counter.eq(0), + wb_triggered_by_flush.eq(1), + flush_writeback_pending.eq(1), + ] + with m.Else(): + # No dirty ways — invalidate all ways at this set + m.d.comb += [ + self.mem.way_wr_en.eq(C(1).replicate(self.params.num_of_ways)), + self.mem.tag_wr_index.eq(flush_index), + self.mem.tag_wr_data.valid.eq(0), + self.mem.tag_wr_data.dirty.eq(0), + self.mem.tag_wr_data.tag.eq(0), + self.mem.tag_wr_en.eq(1), + ] + + with m.If(flush_index == self.params.num_of_sets - 1): + m.d.comb += flush_finish.eq(1) + with m.Else(): + m.d.sync += flush_index.eq(flush_index + 1) + m.d.sync += flush_data_valid.eq(0) + + # ------------------ WRITEBACK --- + + with m.If(fsm.ongoing("WRITEBACK")): + word_bytes_log = exact_log2(self.params.word_width_bytes) + m.d.comb += [ + self.mem.data_rd_addr.index.eq(wb_index), + self.mem.data_rd_addr.offset.eq(wb_word_counter << word_bytes_log), + ] + + @def_method(m, self.provide_writeback_data, ready=fsm.ongoing("WRITEBACK")) + def _(): + m.d.sync += wb_word_counter.eq(wb_word_counter + 1) + return {"data": self.mem.data_rd_data[wb_way]} + + # End the writeback + # Runs if FSM is WRITEBACK and refiller.accept_writeback is ready + with Transaction(name="WritebackEnd").body(m, ready=fsm.ongoing("WRITEBACK")): + result = self.refiller.accept_writeback(m) + self.perf_errors.incr(m, enable_call=result.error) + + with m.If(~result.error): + # After a successful writeback, the victim line can be invalidated. + m.d.comb += [ + self.mem.way_wr_en.eq(1 << wb_way), + self.mem.tag_wr_index.eq(wb_index), + self.mem.tag_wr_data.valid.eq(0), + self.mem.tag_wr_data.dirty.eq(0), + self.mem.tag_wr_data.tag.eq(0), + self.mem.tag_wr_en.eq(1), + ] + + with m.If(wb_triggered_by_flush): + m.d.comb += writeback_done_flush.eq(1) + with m.Else(): + self.refiller.start_refill(m, addr=self.serialize_addr(refill_addr)) + m.d.comb += writeback_done_refill.eq(1) + + with m.Else(): + m.d.comb += writeback_done_error.eq(1) + with m.If(~wb_triggered_by_flush): + res_fifo.write(m, data=0, error=1) + m.d.sync += [ + pending_req_valid.eq(0), + lookup_valid.eq(0), + rr_used.eq(0), + ] + + # Writeback is started either by lookup or flush + + # ---------- LOOKUP ---- + + with Transaction(name="Lookup").body(m, ready=fsm.ongoing("LOOKUP") & pending_req_valid & lookup_valid): + # If cache hit: set dirty bit if store -> return data + # If cache miss: if victim has dirty bit, writeback cache line -> refill -> return data + tag_hit = Array( + self.mem.tag_rd_data[i].valid & (self.mem.tag_rd_data[i].tag == lookup_addr.tag) + for i in range(self.params.num_of_ways) + ) + + tag_hit_any = Signal() + m.d.comb += tag_hit_any.eq(Cat(tag_hit).any()) + + hit_way = Signal(range(self.params.num_of_ways)) + load_data = Signal(self.params.word_width) + + # TODO: optimize that + for i in range(self.params.num_of_ways): + with m.If(tag_hit[i]): + m.d.comb += [ + hit_way.eq(i), + load_data.eq(self.mem.data_rd_data[i]), + ] + + with m.If(tag_hit_any): + response_data = Signal(self.params.word_width) + m.d.comb += response_data.eq(load_data) + with m.If(pending_req.store): + m.d.comb += [ + self.mem.way_wr_en.eq(1 << hit_way), + self.mem.data_wr_en.eq(1), + self.mem.data_wr_addr.index.eq(lookup_addr.index), + self.mem.data_wr_addr.offset.eq(lookup_addr.offset), + self.mem.data_wr_data.eq(pending_req.data), + self.mem.data_wr_mask.eq(pending_req.byte_mask), + self.mem.tag_wr_index.eq(lookup_addr.index), + self.mem.tag_wr_data.valid.eq(1), + self.mem.tag_wr_data.dirty.eq(1), + self.mem.tag_wr_data.tag.eq(lookup_addr.tag), + self.mem.tag_wr_en.eq(1), + ] + m.d.comb += response_data.eq(0) + + res_fifo.write(m, data=response_data, error=0) + + m.d.sync += [ + pending_req_valid.eq(0), + lookup_valid.eq(0), + ] + + with m.Else(): + # we check if dirty, if yes, change FSM to writeback + # then, we refill + # then, lookup transaction starts again, now with proper refilled cache line + + # 1. Choose way determined by round-robin + victim_way = Signal(range(self.params.num_of_ways)) + victim_used_rr = Signal() + m.d.comb += [victim_way.eq(rr_way), victim_used_rr.eq(1)] + + # 2. If there are already some invalid ways, use theam instead of round-robin + for i in reversed(range(self.params.num_of_ways)): + with m.If(~self.mem.tag_rd_data[i].valid): + m.d.comb += [victim_way.eq(i), victim_used_rr.eq(0)] + + m.d.sync += [rr_used.eq(victim_used_rr)] + + victim_tag_data = self.mem.tag_rd_data[victim_way] + victim_addr = Signal(self.addr_layout) + m.d.comb += [ + victim_addr.offset.eq(0), + victim_addr.index.eq(lookup_addr.index), + victim_addr.tag.eq(victim_tag_data.tag), + ] + + aligned_refill_addr = self.serialize_addr(lookup_addr) & ~((1 << self.params.offset_bits) - 1) + + with m.If(victim_tag_data.valid & victim_tag_data.dirty): + # Writeback, then Refill + self.refiller.start_writeback(m, addr=self.serialize_addr(victim_addr)) + m.d.comb += needs_writeback.eq(1) + m.d.sync += [ + wb_way.eq(victim_way), + wb_index.eq(lookup_addr.index), + wb_word_counter.eq(0), + wb_triggered_by_flush.eq(0), + refill_addr.offset.eq(0), + refill_addr.index.eq(lookup_addr.index), + refill_addr.tag.eq(lookup_addr.tag), + refill_way.eq(victim_way), + refill_error.eq(0), + lookup_valid.eq(0), + ] + with m.Else(): + # Refill + self.refiller.start_refill(m, aligned_refill_addr) + m.d.comb += needs_refill.eq(1) + m.d.sync += [ + refill_addr.offset.eq(0), + refill_addr.index.eq(lookup_addr.index), + refill_addr.tag.eq(lookup_addr.tag), + refill_way.eq(victim_way), + refill_error.eq(0), + lookup_valid.eq(0), + ] + + # ------------- REFILL --------- + with Transaction(name="Refill").body(m, ready=fsm.ongoing("REFILL")): + ret = self.refiller.accept_refill(m) + deserialized = self.deserialize_addr(ret.addr) + refill_error_now = Signal() + m.d.comb += refill_error_now.eq(refill_error | ret.error) + + with m.If(~ret.error): + m.d.comb += [ + self.mem.way_wr_en.eq(1 << refill_way), + self.mem.data_wr_en.eq(1), + self.mem.data_wr_addr.index.eq(deserialized["index"]), + self.mem.data_wr_addr.offset.eq(deserialized["offset"]), + self.mem.data_wr_data.eq(ret.data), + self.mem.data_wr_mask.eq((1 << self.params.word_width_bytes) - 1), + ] + + with m.If(ret.error): + m.d.sync += refill_error.eq(1) + + with m.If(ret.last): + m.d.comb += refill_finish.eq(1) + with m.If(~refill_error_now): + m.d.comb += [ + self.mem.way_wr_en.eq(1 << refill_way), + self.mem.tag_wr_index.eq(refill_addr.index), + self.mem.tag_wr_data.valid.eq(1), + self.mem.tag_wr_data.dirty.eq(0), + self.mem.tag_wr_data.tag.eq(refill_addr.tag), + self.mem.tag_wr_en.eq(1), + ] + m.d.sync += [ + lookup_valid.eq(0), + refill_error.eq(0), + rr_used.eq(0), + ] + # move + with m.If(rr_used): + m.d.sync += [ + rr_way.eq(Mux(refill_way == self.params.num_of_ways - 1, 0, refill_way + 1)), + ] + + with m.Else(): + res_fifo.write(m, data=0, error=1) + m.d.sync += [ + pending_req_valid.eq(0), + lookup_valid.eq(0), + refill_error.eq(0), + rr_used.eq(0), + ] + + # ------ Methods --- + @def_method(m, self.accept_res) + def _(): + m.d.sync += outstanding.eq(outstanding - 1) + return res_fifo.read(m) + + @def_method( + m, + self.issue_req, + ready=outstanding != self.params.request_depth, + ) + def _(addr: Value, data: Value, byte_mask: Value, store: Value): + with m.If(store): + self.perf_stores.incr(m) + with m.Else(): + self.perf_loads.incr(m) + + req_fifo.write(m, addr=addr, data=data, byte_mask=byte_mask, store=store) + m.d.sync += outstanding.eq(outstanding + 1) + + # Connection to memory + with m.If(fsm.ongoing("FLUSH")): + m.d.comb += self.mem.tag_rd_index.eq(flush_index) + with m.Elif(fsm.ongoing("WRITEBACK")): + m.d.comb += [ + self.mem.data_rd_addr.index.eq(wb_index), + self.mem.data_rd_addr.offset.eq(wb_word_counter << exact_log2(self.params.word_width_bytes)), + ] + with m.Else(): + m.d.comb += [ + self.mem.tag_rd_index.eq(lookup_addr.index), + self.mem.data_rd_addr.index.eq(lookup_addr.index), + self.mem.data_rd_addr.offset.eq(lookup_addr.offset), + ] + + return m + + +class DCacheMemory(Elaboratable): + """A helper module for managing memories used in the data cache. + + Extends the ICache memory design with: + - A dirty bit in the tag array (for write-back policy). + - Byte-granularity write enables on the data array (for sb/sh/sw). + """ + + def __init__(self, params: DCacheParameters) -> None: + self.params = params + + # Dirty bit - if set, it means cache was modified since loading from ram; we need to save the cache on flush + self.tag_data_layout = make_layout(("valid", 1), ("dirty", 1), ("tag", self.params.tag_bits)) + + self.way_wr_en = Signal(self.params.num_of_ways) + + self.tag_rd_index = Signal(self.params.index_bits) + self.tag_rd_data = Array([Signal(self.tag_data_layout) for _ in range(self.params.num_of_ways)]) + self.tag_wr_index = Signal(self.params.index_bits) + self.tag_wr_en = Signal() + self.tag_wr_data = Signal(self.tag_data_layout) + + self.data_addr_layout = make_layout(("index", self.params.index_bits), ("offset", self.params.offset_bits)) + + self.word_bits = params.word_width + self.word_bytes = params.word_width // 8 + + self.data_rd_addr = Signal(self.data_addr_layout) + self.data_rd_data = Array([Signal(self.word_bits) for _ in range(self.params.num_of_ways)]) + + self.data_wr_addr = Signal(self.data_addr_layout) + self.data_wr_en = Signal() + self.data_wr_data = Signal(self.word_bits) + + self.data_wr_mask = Signal(self.word_bytes) # byte-granularity write mask + self.tag_mems: list[memory.Memory] = [] + self.data_mems: list[memory.Memory] = [] + + def elaborate(self, platform): + m = TModule() + + for i in range(self.params.num_of_ways): + way_wr = self.way_wr_en[i] + + tag_mem = memory.Memory(shape=self.tag_data_layout, depth=self.params.num_of_sets, init=[]) + self.tag_mems.append(tag_mem) + + tag_mem_wp = tag_mem.write_port() + tag_mem_rp = tag_mem.read_port(transparent_for=[tag_mem_wp]) + m.submodules[f"tag_mem_{i}"] = tag_mem + + m.d.comb += [ + assign(self.tag_rd_data[i], tag_mem_rp.data), + tag_mem_rp.addr.eq(self.tag_rd_index), + tag_mem_wp.addr.eq(self.tag_wr_index), + assign(tag_mem_wp.data, self.tag_wr_data), + tag_mem_wp.en.eq(self.tag_wr_en & way_wr), + ] + + data_mem = memory.Memory( + shape=self.word_bits, + depth=self.params.num_of_sets * self.params.words_in_line, + init=[], + ) + self.data_mems.append(data_mem) + data_mem_wp = data_mem.write_port(granularity=8) + data_mem_rp = data_mem.read_port(transparent_for=[data_mem_wp]) + m.submodules[f"data_mem_{i}"] = data_mem + + word_bytes_log = exact_log2(self.word_bytes) + rd_addr = Cat(self.data_rd_addr.offset, self.data_rd_addr.index)[word_bytes_log:] + wr_addr = Cat(self.data_wr_addr.offset, self.data_wr_addr.index)[word_bytes_log:] + + m.d.comb += [ + self.data_rd_data[i].eq(data_mem_rp.data), + data_mem_rp.addr.eq(rd_addr), + data_mem_wp.addr.eq(wr_addr), + Value.cast(data_mem_wp.data).eq(self.data_wr_data), + data_mem_wp.en.eq(Mux(self.data_wr_en & way_wr, self.data_wr_mask, 0)), + ] + + return m diff --git a/coreblocks/cache/iface.py b/coreblocks/cache/iface.py index 132c92b7e..fcbd5eacd 100644 --- a/coreblocks/cache/iface.py +++ b/coreblocks/cache/iface.py @@ -4,7 +4,7 @@ from amaranth_types import HasElaborate -__all__ = ["CacheInterface", "CacheRefillerInterface"] +__all__ = ["CacheInterface", "CacheRefillerInterface", "DataCacheRefillerInterface"] class CacheInterface(HasElaborate, Protocol): @@ -40,3 +40,23 @@ class CacheRefillerInterface(HasElaborate, Protocol): start_refill: Method accept_refill: Method + + +class DataCacheRefillerInterface(CacheRefillerInterface): + """ + Data Cache Refiller Interface. + + Parameters + ---------- + start_refill: Method + A method that is used to start a refill for a given cache line. + accept_refill: Method + A method that is used to accept one fetch block from the requested cache line. + start_writeback: Method + Writes dirty data from cache to memory + accept_writeback: Method + Accepts writeback result + """ + + start_writeback: Method + accept_writeback: Method diff --git a/coreblocks/cache/refiller.py b/coreblocks/cache/refiller.py index 53117867c..cf921aa1c 100644 --- a/coreblocks/cache/refiller.py +++ b/coreblocks/cache/refiller.py @@ -1,7 +1,7 @@ from amaranth import * -from coreblocks.cache.icache import CacheRefillerInterface -from coreblocks.params import ICacheParameters -from coreblocks.interface.layouts import ICacheLayouts +from coreblocks.cache.iface import CacheRefillerInterface, DataCacheRefillerInterface +from coreblocks.params import ICacheParameters, DCacheParameters +from coreblocks.interface.layouts import ICacheLayouts, DCacheLayouts from coreblocks.peripherals.bus_adapter import BusMasterInterface from transactron.core import Transaction, Method, TModule, def_method from transactron.lib import Forwarder @@ -9,7 +9,7 @@ from amaranth.utils import exact_log2 -__all__ = ["SimpleCommonBusCacheRefiller"] +__all__ = ["SimpleCommonBusCacheRefiller", "SimpleCommonBusDataCacheRefiller"] class SimpleCommonBusCacheRefiller(Elaboratable, CacheRefillerInterface): @@ -105,3 +105,149 @@ def _(): return resp_fwd.read(m) return m + + +class SimpleCommonBusDataCacheRefiller(Elaboratable, DataCacheRefillerInterface): + def __init__(self, layouts: DCacheLayouts, params: DCacheParameters, bus_master: BusMasterInterface): + if params.word_width != bus_master.params.data_width: + raise ValueError("Data cache word width must match bus data width.") + if bus_master.params.granularity != 8: + raise ValueError("Data cache refiller expects byte-granular bus selects.") + + self.layouts = layouts + self.params = params + self.bus_master = bus_master + + self.start_refill = Method(i=layouts.start_refill) + self.accept_refill = Method(o=layouts.accept_refill) + + self.start_writeback = Method(i=layouts.start_writeback) + self.accept_writeback = Method(o=layouts.accept_writeback) + self.get_writeback_data = Method(o=layouts.provide_writeback_data) + + def elaborate(self, platform): + m = TModule() + + m.submodules.refill_resp_fwd = refill_resp_fwd = Forwarder(self.layouts.accept_refill) + m.submodules.writeback_resp_fwd = writeback_resp_fwd = Forwarder(self.layouts.accept_writeback) + + line_addr = Signal(self.params.addr_width - self.params.offset_bits) + word_idx = Signal(range(self.params.words_in_line)) + writeback_error = Signal() + + word_bytes_log = exact_log2(self.params.word_width_bytes) + full_sel = C(1).replicate(self.bus_master.params.data_width // self.bus_master.params.granularity) + bus_word_addr = Cat(word_idx, line_addr) + byte_word_addr = Cat(C(0, word_bytes_log), word_idx, line_addr) + last_word = word_idx == self.params.words_in_line - 1 + + start_refill_req = Signal() + start_writeback_req = Signal() + bus_read_request_done = Signal() + bus_read_done = Signal() + bus_read_error = Signal() + bus_write_request_done = Signal() + bus_write_done = Signal() + + with m.FSM(init="IDLE") as fsm: + with m.State("IDLE"): + with m.If(start_refill_req): + m.next = "REFILL_REQ" + with m.Elif(start_writeback_req): + m.next = "WRITEBACK_REQ" + + with m.State("REFILL_REQ"): + with m.If(bus_read_request_done): + m.next = "REFILL_RESP" + + with m.State("REFILL_RESP"): + with m.If(bus_read_done): + with m.If(bus_read_error | last_word): + m.next = "IDLE" + with m.Else(): + m.next = "REFILL_REQ" + + with m.State("WRITEBACK_REQ"): + with m.If(bus_write_request_done): + m.next = "WRITEBACK_RESP" + + with m.State("WRITEBACK_RESP"): + with m.If(bus_write_done): + with m.If(last_word): + m.next = "IDLE" + with m.Else(): + m.next = "WRITEBACK_REQ" + + with Transaction(name="DCacheRefillRequest").body(m, ready=fsm.ongoing("REFILL_REQ")): + self.bus_master.request_read( + m, + addr=bus_word_addr, + sel=full_sel, + ) + m.d.comb += bus_read_request_done.eq(1) + + with Transaction(name="DCacheRefillResponse").body(m, ready=fsm.ongoing("REFILL_RESP")): + bus_response = self.bus_master.get_read_response(m) + m.d.comb += [ + bus_read_done.eq(1), + bus_read_error.eq(bus_response.err), + ] + refill_resp_fwd.write( + m, + addr=byte_word_addr, + data=bus_response.data, + error=bus_response.err, + last=bus_response.err | last_word, + ) + + with m.If(~bus_response.err & ~last_word): + m.d.sync += word_idx.eq(word_idx + 1) + + with Transaction(name="DCacheWritebackRequest").body(m, ready=fsm.ongoing("WRITEBACK_REQ")): + data = self.get_writeback_data(m) + self.bus_master.request_write( + m, + addr=bus_word_addr, + data=data.data, + sel=full_sel, + ) + m.d.comb += bus_write_request_done.eq(1) + + with Transaction(name="DCacheWritebackResponse").body(m, ready=fsm.ongoing("WRITEBACK_RESP")): + bus_response = self.bus_master.get_write_response(m) + m.d.comb += bus_write_done.eq(1) + + with m.If(last_word): + writeback_resp_fwd.write(m, error=writeback_error | bus_response.err) + with m.Else(): + m.d.sync += [ + writeback_error.eq(writeback_error | bus_response.err), + word_idx.eq(word_idx + 1), + ] + + @def_method(m, self.start_refill, ready=fsm.ongoing("IDLE")) + def _(addr) -> None: + m.d.comb += start_refill_req.eq(1) + m.d.sync += [ + line_addr.eq(addr[self.params.offset_bits :]), + word_idx.eq(0), + ] + + @def_method(m, self.accept_refill) + def _(): + return refill_resp_fwd.read(m) + + @def_method(m, self.start_writeback, ready=fsm.ongoing("IDLE")) + def _(addr) -> None: + m.d.comb += start_writeback_req.eq(1) + m.d.sync += [ + line_addr.eq(addr[self.params.offset_bits :]), + word_idx.eq(0), + writeback_error.eq(0), + ] + + @def_method(m, self.accept_writeback) + def _(): + return writeback_resp_fwd.read(m) + + return m diff --git a/coreblocks/func_blocks/fu/lsu/dummyLsu.py b/coreblocks/func_blocks/fu/lsu/dummyLsu.py index 83a9d64b1..ceb2ff787 100644 --- a/coreblocks/func_blocks/fu/lsu/dummyLsu.py +++ b/coreblocks/func_blocks/fu/lsu/dummyLsu.py @@ -2,6 +2,7 @@ from amaranth import * from transactron import Method, TModule, Transaction, def_method +from transactron.lib import BasicFifo from transactron.lib.connectors import FIFO from transactron.lib.logging import HardwareLogger from transactron.lib.simultaneous import condition @@ -9,6 +10,9 @@ from transactron.utils.transactron_helpers import make_layout from coreblocks.arch import OpType +from coreblocks.cache.dcache import DCache, DCacheBypass +from coreblocks.cache.iface import CacheInterface +from coreblocks.cache.refiller import SimpleCommonBusDataCacheRefiller from coreblocks.arch.isa_consts import ExceptionCause from coreblocks.frontend.decoder import * from coreblocks.func_blocks.fu.lsu.lsu_requester import LSURequester @@ -22,13 +26,64 @@ ExceptionReportKey, InstructionPrecommitKey, ) -from coreblocks.interface.layouts import FuncUnitLayouts, LSULayouts +from coreblocks.interface.layouts import DCacheLayouts, FuncUnitLayouts, LSULayouts from coreblocks.params import * from coreblocks.peripherals.bus_adapter import BusMasterInterface __all__ = ["LSUDummy", "LSUComponent"] +class LSUDataPathRouter(Elaboratable, CacheInterface): + def __init__(self, gen_params: GenParams, cached_data_path: CacheInterface, mmio_data_path: CacheInterface) -> None: + self.gen_params = gen_params + layouts = gen_params.get(DCacheLayouts) + + self.cached_data_path = cached_data_path + self.mmio_data_path = mmio_data_path + + self.issue_req = Method(i=layouts.issue_req) + self.accept_res = Method(o=layouts.accept_res) + self.flush = Method() + + def elaborate(self, platform): + m = TModule() + + m.submodules.pma_checker = pma_checker = PMAChecker(self.gen_params) + m.submodules.mmio_fifo = mmio_fifo = BasicFifo([("mmio", 1)], self.gen_params.dcache_params.request_depth) + + @def_method(m, self.issue_req) + def _(addr: Value, data: Value, byte_mask: Value, store: Value): + m.d.av_comb += pma_checker.addr.eq(addr) + + with condition(m) as branch: + with branch(pma_checker.result["mmio"]): + self.mmio_data_path.issue_req(m, addr=addr, data=data, byte_mask=byte_mask, store=store) + with branch(): + self.cached_data_path.issue_req(m, addr=addr, data=data, byte_mask=byte_mask, store=store) + + mmio_fifo.write(m, mmio=pma_checker.result["mmio"]) + + @def_method(m, self.accept_res) + def _(): + route = mmio_fifo.read(m) + response = Signal(self.gen_params.get(DCacheLayouts).accept_res) + + with condition(m) as branch: + with branch(route.mmio): + m.d.comb += response.eq(self.mmio_data_path.accept_res(m)) + with branch(): + m.d.comb += response.eq(self.cached_data_path.accept_res(m)) + + return response + + @def_method(m, self.flush) + def _() -> None: + self.cached_data_path.flush(m) + self.mmio_data_path.flush(m) + + return m + + class LSUDummy(FuncUnit, Elaboratable): """ Very simple LSU, which serializes all stores and loads. @@ -77,7 +132,28 @@ def elaborate(self, platform): csr = self.dependency_manager.get_dependency(CSRInstancesKey()) m.submodules.pma_checker = pma_checker = PMAChecker(self.gen_params) m.submodules.pmp_checker = pmp_checker = PMPChecker(self.gen_params, csr.m_mode) - m.submodules.requester = requester = LSURequester(self.gen_params, self.bus) + + dcache_layouts = self.gen_params.get(DCacheLayouts) + if self.gen_params.dcache_params.enable: + m.submodules.dcache_refiller = dcache_refiller = SimpleCommonBusDataCacheRefiller( + dcache_layouts, self.gen_params.dcache_params, self.bus + ) + m.submodules.cached_data_path = cached_data_path = DCache( + dcache_layouts, self.gen_params.dcache_params, dcache_refiller + ) + dcache_refiller.get_writeback_data.provide(cached_data_path.provide_writeback_data) + else: + m.submodules.cached_data_path = cached_data_path = DCacheBypass( + dcache_layouts, self.gen_params.dcache_params, self.bus + ) + + m.submodules.mmio_data_path = mmio_data_path = DCacheBypass( + dcache_layouts, self.gen_params.dcache_params, self.bus + ) + m.submodules.data_path_router = data_path_router = LSUDataPathRouter( + self.gen_params, cached_data_path, mmio_data_path + ) + m.submodules.requester = requester = LSURequester(self.gen_params, data_path_router) request_layout = make_layout( ("data", self.fu_layouts.issue), diff --git a/coreblocks/func_blocks/fu/lsu/lsu_requester.py b/coreblocks/func_blocks/fu/lsu/lsu_requester.py index 86faa5085..188000d93 100644 --- a/coreblocks/func_blocks/fu/lsu/lsu_requester.py +++ b/coreblocks/func_blocks/fu/lsu/lsu_requester.py @@ -7,15 +7,14 @@ from coreblocks.params import * from coreblocks.arch import Funct3, ExceptionCause -from coreblocks.peripherals.bus_adapter import BusMasterInterface +from coreblocks.cache.iface import CacheInterface from coreblocks.interface.layouts import LSULayouts class LSURequester(Elaboratable): """ - Bus request logic for the load/store unit. Its job is to interface - between the LSU and the bus. - + Memory request logic for the load/store unit. Its job is to interface + between the LSU and the data cache. Attributes ---------- issue : Method @@ -24,20 +23,20 @@ class LSURequester(Elaboratable): Retrieves a result from the bus. """ - def __init__(self, gen_params: GenParams, bus: BusMasterInterface, depth: int = 4) -> None: + def __init__(self, gen_params: GenParams, cache: CacheInterface, depth: int = 4) -> None: """ Parameters ---------- gen_params : GenParams Parameters to be used during processor generation. - bus : BusMasterInterface - An instance of the bus master for interfacing with the data bus. + cache : CacheInterface + Data-cache-like interface used for memory requests. depth : int Number of requests which can be send to memory, before it provides first response. Describe the resiliency of `LSURequester` to latency of memory in case when memory is fully pipelined. """ self.gen_params = gen_params - self.bus = bus + self.cache = cache self.depth = depth lsu_layouts = gen_params.get(LSULayouts) @@ -48,7 +47,7 @@ def __init__(self, gen_params: GenParams, bus: BusMasterInterface, depth: int = self.log = HardwareLogger("backend.lsu.requester") def prepare_bytes_mask(self, m: ModuleLike, funct3: Value, addr: Value) -> Signal: - mask_len = self.gen_params.isa.xlen // self.bus.params.granularity + mask_len = self.gen_params.isa.xlen // 8 mask = Signal(mask_len) with m.Switch(funct3): with m.Case(Funct3.B, Funct3.BU): @@ -130,14 +129,11 @@ def _(addr: Value, data: Value, funct3: Value, store: Value): ) with condition(m, nonblocking=True) as branch: - with branch(aligned & store): - self.bus.request_write(m, addr=addr >> 2, data=bus_data, sel=bytes_mask) - with branch(aligned & ~store): - self.bus.request_read(m, addr=addr >> 2, sel=bytes_mask) - - with m.If(aligned): - args_fifo.write(m, addr=addr, funct3=funct3, store=store) - with m.Else(): + with branch(aligned): + self.cache.issue_req(m, addr=addr, data=bus_data, byte_mask=bytes_mask, store=store) + args_fifo.write(m, addr=addr, funct3=funct3, store=store) + + with m.If(~aligned): m.d.av_comb += exception.eq(1) m.d.av_comb += cause.eq( Mux(store, ExceptionCause.STORE_ADDRESS_MISALIGNED, ExceptionCause.LOAD_ADDRESS_MISALIGNED) @@ -155,16 +151,13 @@ def _(): request_args = args_fifo.read(m) self.log.debug(m, 1, "accept data=0x{:08x} exception={} cause={}", data, exception, cause) - with condition(m) as branch: - with branch(request_args.store): - fetched = self.bus.get_write_response(m) - m.d.comb += err.eq(fetched.err) - with branch(): - fetched = self.bus.get_read_response(m) - m.d.comb += err.eq(fetched.err) - m.d.top_comb += data.eq( - self.postprocess_load_data(m, request_args.funct3, fetched.data, request_args.addr) - ) + fetched = self.cache.accept_res(m) + m.d.comb += err.eq(fetched.error) + + with m.If(~request_args.store): + m.d.top_comb += data.eq( + self.postprocess_load_data(m, request_args.funct3, fetched.data, request_args.addr) + ) with m.If(err): m.d.av_comb += exception.eq(1) diff --git a/coreblocks/interface/layouts.py b/coreblocks/interface/layouts.py index d729d5943..78f647ab8 100644 --- a/coreblocks/interface/layouts.py +++ b/coreblocks/interface/layouts.py @@ -23,6 +23,7 @@ "CSRRegisterLayouts", "CSRUnitLayouts", "ICacheLayouts", + "DCacheLayouts", "JumpBranchLayouts", ] @@ -541,6 +542,51 @@ def __init__(self, gen_params: GenParams): ) +class DCacheLayouts: + """Layouts used in the data cache.""" + + def __init__(self, gen_params: GenParams): + fields = gen_params.get(CommonLayoutFields) + + self.store: LayoutListField = ("store", 1) + """Request is a store operation.""" + + self.byte_mask: LayoutListField = ("byte_mask", gen_params.isa.xlen // 8) + """Byte-enable mask for stores. Each bit corresponds to one byte of the word.""" + + self.last: LayoutListField = ("last", 1) + """Last word in a cache line burst transfer.""" + + self.issue_req = make_layout( + fields.addr, + fields.data, + self.byte_mask, + self.store, + ) + + self.accept_res = make_layout( + fields.data, + fields.error, + ) + + self.start_refill = make_layout(fields.addr) + + self.accept_refill = make_layout( + fields.addr, + fields.data, + fields.error, + self.last, + ) + + self.start_writeback = make_layout(fields.addr) + + self.provide_writeback_data = make_layout( + fields.data, + ) + + self.accept_writeback = make_layout(fields.error) + + class FetchLayouts: """Layouts used in the fetcher.""" diff --git a/coreblocks/params/__init__.py b/coreblocks/params/__init__.py index 594681832..2c448c1ae 100644 --- a/coreblocks/params/__init__.py +++ b/coreblocks/params/__init__.py @@ -1,5 +1,6 @@ from .genparams import * # noqa: F401 from .fu_params import * # noqa: F401 from .icache_params import * # noqa: F401 +from .dcache_params import * # noqa: F401 from .instr import * # noqa: F401 from .vmem_params import * # noqa: F401 diff --git a/coreblocks/params/configurations.py b/coreblocks/params/configurations.py index 0dc9f5929..af3863dc9 100644 --- a/coreblocks/params/configurations.py +++ b/coreblocks/params/configurations.py @@ -98,7 +98,17 @@ class _CoreConfigurationDataClass: icache_sets_bits: int Log of the number of sets of the instruction cache. icache_line_bytes_log: int - Log of the cache line size (in bytes). + Log of the instruction cache line size (in bytes). + dcache_enable: bool + Enable data cache. If disabled, requests are bypassed directly to the bus. + dcache_ways: int + Associativity of the data cache. + dcache_sets_bits: int + Log of the number of sets of the data cache. + dcache_line_bytes_log: int + Log of the data cache line size (in bytes). + dcache_request_depth: int + Number of requests accepted by the data cache interface. fetch_block_bytes_log: int Log of the size of the fetch block (in bytes). instr_buffer_size: int @@ -169,6 +179,12 @@ def __post_init__(self): icache_sets_bits: int = 7 icache_line_bytes_log: int = 5 + dcache_enable: bool = True + dcache_ways: int = 2 + dcache_sets_bits: int = 7 + dcache_line_bytes_log: int = 5 + dcache_request_depth: int = 4 + fetch_block_bytes_log: int = 2 instr_buffer_size: int = 4 @@ -220,6 +236,7 @@ def replace(self, **kwargs) -> Self: phys_regs_bits=basic_core_config.phys_regs_bits - 1, rob_entries_bits=basic_core_config.rob_entries_bits - 1, icache_enable=False, + dcache_enable=False, user_mode=False, supervisor_mode=False, ) diff --git a/coreblocks/params/dcache_params.py b/coreblocks/params/dcache_params.py new file mode 100644 index 000000000..8415bdec9 --- /dev/null +++ b/coreblocks/params/dcache_params.py @@ -0,0 +1,53 @@ +class DCacheParameters: + """Parameters of the Data Cache. + + Parameters + ---------- + addr_width : int + Length of addresses used in the cache (in bits). + word_width : int + Length of the machine word (in bits). + num_of_ways : int + Associativity of the cache. + num_of_sets_bits : int + Log of the number of cache sets. + line_bytes_log : int + Log of the size of a single cache line in bytes. + request_depth : int + Number of requests accepted by the public D-cache interface. + enable : bool + Enable the data cache. If disabled, requests are bypassed to the bus. + """ + + def __init__( + self, + *, + addr_width, + word_width, + num_of_ways, + num_of_sets_bits, + line_bytes_log, + request_depth=4, + enable=True, + ): + self.addr_width = addr_width + self.word_width = word_width + self.num_of_ways = num_of_ways + self.num_of_sets_bits = num_of_sets_bits + self.line_bytes_log = line_bytes_log + self.request_depth = request_depth + self.enable = enable + + self.num_of_sets = 2**num_of_sets_bits + self.line_size_bytes = 2**line_bytes_log + + self.word_width_bytes = word_width // 8 + + self.offset_bits = line_bytes_log + self.index_bits = num_of_sets_bits + self.tag_bits = self.addr_width - self.offset_bits - self.index_bits + + self.index_start_bit = self.offset_bits + self.index_end_bit = self.offset_bits + self.index_bits - 1 + + self.words_in_line = self.line_size_bytes // self.word_width_bytes diff --git a/coreblocks/params/genparams.py b/coreblocks/params/genparams.py index 7a577190b..dd5ac65bc 100644 --- a/coreblocks/params/genparams.py +++ b/coreblocks/params/genparams.py @@ -4,6 +4,7 @@ from coreblocks.arch.isa import ISA, gen_isa_string from .icache_params import ICacheParameters +from .dcache_params import DCacheParameters from .vmem_params import VirtualMemoryParameters from .fu_params import extensions_supported from ..peripherals.wishbone import WishboneParameters @@ -56,6 +57,16 @@ def __init__(self, cfg: CoreConfiguration): enable=cfg.icache_enable, ) + self.dcache_params = DCacheParameters( + addr_width=self.isa.xlen, + word_width=self.isa.xlen, + num_of_ways=cfg.dcache_ways, + num_of_sets_bits=cfg.dcache_sets_bits, + line_bytes_log=cfg.dcache_line_bytes_log, + request_depth=cfg.dcache_request_depth, + enable=cfg.dcache_enable, + ) + self.debug_signals_enabled = cfg.debug_signals # Verification temporally disabled diff --git a/test/cache/test_dcache.py b/test/cache/test_dcache.py new file mode 100644 index 000000000..e20f7bc56 --- /dev/null +++ b/test/cache/test_dcache.py @@ -0,0 +1,1132 @@ +from collections import deque + +from amaranth import Elaboratable +from amaranth.utils import exact_log2 + +from transactron.lib import Adapter, AdapterTrans +from transactron.testing import CallTrigger, TestCaseWithSimulator, TestbenchIO, def_method_mock, TestbenchContext +from transactron.testing.method_mock import MethodMock +from transactron.utils import ModuleConnector + +from coreblocks.cache.dcache import DCache, DCacheBypass +from coreblocks.cache.refiller import SimpleCommonBusDataCacheRefiller +from coreblocks.cache.iface import DataCacheRefillerInterface +from coreblocks.interface.layouts import DCacheLayouts +from coreblocks.params import GenParams +from coreblocks.params.configurations import test_core_config + +from ..peripherals.bus_mock import BusMockParameters, MockMasterAdapter + + +class SimpleCommonBusDataCacheRefillerTestCircuit(Elaboratable): + def __init__(self, gen_params: GenParams): + self.gen_params = gen_params + self.cp = self.gen_params.dcache_params + + def elaborate(self, platform): + layouts = self.gen_params.get(DCacheLayouts) + bus_mock_params = BusMockParameters( + data_width=self.gen_params.isa.xlen, + addr_width=self.gen_params.wb_params.addr_width, + ) + + self.bus_master_adapter = MockMasterAdapter(bus_mock_params) + self.refiller = SimpleCommonBusDataCacheRefiller(layouts, self.cp, self.bus_master_adapter) + + self.writeback_data_mock = TestbenchIO(Adapter(o=layouts.provide_writeback_data)) + self.refiller.get_writeback_data.provide(self.writeback_data_mock.adapter.iface) + + self.start_refill = TestbenchIO(AdapterTrans.create(self.refiller.start_refill)) + self.accept_refill = TestbenchIO(AdapterTrans.create(self.refiller.accept_refill)) + self.start_writeback = TestbenchIO(AdapterTrans.create(self.refiller.start_writeback)) + self.accept_writeback = TestbenchIO(AdapterTrans.create(self.refiller.accept_writeback)) + + return ModuleConnector( + bus_master_adapter=self.bus_master_adapter, + refiller=self.refiller, + writeback_data_mock=self.writeback_data_mock, + start_refill=self.start_refill, + accept_refill=self.accept_refill, + start_writeback=self.start_writeback, + accept_writeback=self.accept_writeback, + ) + + +class TestSimpleCommonBusDataCacheRefiller(TestCaseWithSimulator): + def setup_method(self) -> None: + self.gen_params = GenParams( + test_core_config.replace( + xlen=32, + dcache_line_bytes_log=4, + ) + ) + self.cp = self.gen_params.dcache_params + self.m = SimpleCommonBusDataCacheRefillerTestCircuit(self.gen_params) + self.writeback_words = deque() + + @def_method_mock(lambda self: self.m.writeback_data_mock, enable=lambda self: bool(self.writeback_words)) + def writeback_data(self): + @MethodMock.effect + def eff(): + self.writeback_words.popleft() + + return {"data": self.writeback_words[0]} + + def bus_word_addr(self, byte_addr: int, word_idx: int) -> int: + return (byte_addr >> exact_log2(self.cp.word_width_bytes)) + word_idx + + def byte_word_addr(self, byte_addr: int, word_idx: int) -> int: + return byte_addr + word_idx * self.cp.word_width_bytes + + def full_sel(self) -> int: + return (1 << self.cp.word_width_bytes) - 1 + + def test_refill_reads_full_line_and_emits_word_beats(self): + async def process(sim: TestbenchContext): + base_addr = 0x00000100 + words = [0x10203040, 0x50607080, 0x90A0B0C0, 0xD0E0F000] + + await self.m.start_refill.call(sim, addr=base_addr) + + for word_idx, word in enumerate(words): + req = await self.m.bus_master_adapter.request_read_mock.call(sim) + assert req["addr"] == self.bus_word_addr(base_addr, word_idx) + assert req["sel"] == self.full_sel() + + await self.m.bus_master_adapter.get_read_response_mock.call(sim, data=word, err=0) + resp = await self.m.accept_refill.call(sim) + + assert resp["addr"] == self.byte_word_addr(base_addr, word_idx) + assert resp["data"] == word + assert resp["error"] == 0 + assert resp["last"] == int(word_idx == len(words) - 1) + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_refill_error_returns_error_last_and_stops(self): + async def process(sim: TestbenchContext): + base_addr = 0x00000140 + words = [0x11111111, 0x22222222] + + await self.m.start_refill.call(sim, addr=base_addr) + + req = await self.m.bus_master_adapter.request_read_mock.call(sim) + assert req["addr"] == self.bus_word_addr(base_addr, 0) + await self.m.bus_master_adapter.get_read_response_mock.call(sim, data=words[0], err=0) + + resp = await self.m.accept_refill.call(sim) + assert resp["addr"] == self.byte_word_addr(base_addr, 0) + assert resp["data"] == words[0] + assert resp["error"] == 0 + assert resp["last"] == 0 + + req = await self.m.bus_master_adapter.request_read_mock.call(sim) + assert req["addr"] == self.bus_word_addr(base_addr, 1) + await self.m.bus_master_adapter.get_read_response_mock.call(sim, data=words[1], err=1) + + resp = await self.m.accept_refill.call(sim) + assert resp["addr"] == self.byte_word_addr(base_addr, 1) + assert resp["data"] == words[1] + assert resp["error"] == 1 + assert resp["last"] == 1 + + for _ in range(3): + req = await self.m.bus_master_adapter.request_read_mock.call_try(sim) + assert req is None + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_writeback_writes_full_line_and_returns_success(self): + async def process(sim: TestbenchContext): + base_addr = 0x00000200 + words = [0xDEADBEEF, 0x11223344, 0x55667788, 0x99AABBCC] + self.writeback_words.extend(words) + + await self.m.start_writeback.call(sim, addr=base_addr) + + for word_idx, word in enumerate(words): + req = await self.m.bus_master_adapter.request_write_mock.call(sim) + assert req["addr"] == self.bus_word_addr(base_addr, word_idx) + assert req["data"] == word + assert req["sel"] == self.full_sel() + + await self.m.bus_master_adapter.get_write_response_mock.call(sim, err=0) + + resp = await self.m.accept_writeback.call(sim) + assert resp["error"] == 0 + assert not self.writeback_words + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_writeback_accumulates_error(self): + async def process(sim: TestbenchContext): + base_addr = 0x00000240 + words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + errors = [0, 1, 0, 0] + self.writeback_words.extend(words) + + await self.m.start_writeback.call(sim, addr=base_addr) + + for word_idx, word in enumerate(words): + req = await self.m.bus_master_adapter.request_write_mock.call(sim) + assert req["addr"] == self.bus_word_addr(base_addr, word_idx) + assert req["data"] == word + + await self.m.bus_master_adapter.get_write_response_mock.call(sim, err=errors[word_idx]) + + resp = await self.m.accept_writeback.call(sim) + assert resp["error"] == 1 + assert not self.writeback_words + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_start_methods_are_not_ready_while_busy(self): + async def process(sim: TestbenchContext): + refill_addr = 0x00000300 + writeback_addr = 0x00000340 + words = [0xA0A0A0A0, 0xB1B1B1B1, 0xC2C2C2C2, 0xD3D3D3D3] + + await self.m.start_refill.call(sim, addr=refill_addr) + ret = await self.m.start_writeback.call_try(sim, addr=writeback_addr) + assert ret is None + + for word_idx, word in enumerate(words): + await self.m.bus_master_adapter.request_read_mock.call(sim) + await self.m.bus_master_adapter.get_read_response_mock.call(sim, data=word, err=0) + resp = await self.m.accept_refill.call(sim) + assert resp["last"] == int(word_idx == len(words) - 1) + + self.writeback_words.extend(words) + await self.m.start_writeback.call(sim, addr=writeback_addr) + ret = await self.m.start_refill.call_try(sim, addr=refill_addr) + assert ret is None + + for _ in words: + await self.m.bus_master_adapter.request_write_mock.call(sim) + await self.m.bus_master_adapter.get_write_response_mock.call(sim, err=0) + + resp = await self.m.accept_writeback.call(sim) + assert resp["error"] == 0 + + ret = await self.m.start_refill.call_try(sim, addr=refill_addr) + assert ret is not None + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + +class DCacheBypassTestCircuit(Elaboratable): + def __init__(self, gen_params: GenParams): + self.gen_params = gen_params + self.cp = self.gen_params.dcache_params + + def elaborate(self, platform): + layouts = self.gen_params.get(DCacheLayouts) + bus_mock_params = BusMockParameters( + data_width=self.gen_params.isa.xlen, + addr_width=self.gen_params.wb_params.addr_width, + ) + + self.bus_master_adapter = MockMasterAdapter(bus_mock_params) + self.cache = DCacheBypass(layouts, self.cp, self.bus_master_adapter) + self.issue_req = TestbenchIO(AdapterTrans.create(self.cache.issue_req)) + self.accept_res = TestbenchIO(AdapterTrans.create(self.cache.accept_res)) + self.flush_cache = TestbenchIO(AdapterTrans.create(self.cache.flush)) + + return ModuleConnector( + bus_master_adapter=self.bus_master_adapter, + cache=self.cache, + issue_req=self.issue_req, + accept_res=self.accept_res, + flush_cache=self.flush_cache, + ) + + +class TestDCacheBypass(TestCaseWithSimulator): + def setup_method(self) -> None: + self.gen_params = GenParams( + test_core_config.replace( + xlen=32, + dcache_line_bytes_log=4, + ) + ) + self.cp = self.gen_params.dcache_params + self.m = DCacheBypassTestCircuit(self.gen_params) + + def test_load(self): + async def process(sim: TestbenchContext): + byte_addr = 0x00000114 + data = 0x11223344 + byte_mask = 0b0110 + + _, req = await ( + CallTrigger(sim) + .call(self.m.issue_req, addr=byte_addr, data=0, byte_mask=byte_mask, store=0) + .call(self.m.bus_master_adapter.request_read_mock) + ) + + assert req["addr"] == byte_addr >> exact_log2(self.cp.word_width_bytes) + assert req["sel"] == byte_mask + + _, resp = await ( + CallTrigger(sim) + .call(self.m.bus_master_adapter.get_read_response_mock, data=data, err=0) + .call(self.m.accept_res) + ) + + assert resp["data"] == data + assert resp["error"] == 0 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_store(self): + async def process(sim: TestbenchContext): + byte_addr = 0x00000118 + data = 0xAABBCCDD + byte_mask = 0b1100 + + _, req = await ( + CallTrigger(sim) + .call(self.m.issue_req, addr=byte_addr, data=data, byte_mask=byte_mask, store=1) + .call(self.m.bus_master_adapter.request_write_mock) + ) + + assert req["addr"] == byte_addr >> exact_log2(self.cp.word_width_bytes) + assert req["data"] == data + assert req["sel"] == byte_mask + + _, resp = await ( + CallTrigger(sim).call(self.m.bus_master_adapter.get_write_response_mock, err=0).call(self.m.accept_res) + ) + + assert resp["data"] == 0 + assert resp["error"] == 0 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_error(self): + async def process(sim: TestbenchContext): + byte_addr = 0x0000011C + + await ( + CallTrigger(sim) + .call(self.m.issue_req, addr=byte_addr, data=0, byte_mask=0b1111, store=0) + .call(self.m.bus_master_adapter.request_read_mock) + ) + + _, resp = await ( + CallTrigger(sim) + .call(self.m.bus_master_adapter.get_read_response_mock, data=0, err=1) + .call(self.m.accept_res) + ) + + assert resp["data"] == 0 + assert resp["error"] == 1 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + def test_queue_order(self): + async def process(sim: TestbenchContext): + base_addr = 0x00000200 + words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + + for word_idx in range(self.cp.request_depth): + _, req = await ( + CallTrigger(sim) + .call( + self.m.issue_req, + addr=base_addr + word_idx * self.cp.word_width_bytes, + data=0, + byte_mask=0b1111, + store=0, + ) + .call(self.m.bus_master_adapter.request_read_mock) + ) + assert req["addr"] == (base_addr >> exact_log2(self.cp.word_width_bytes)) + word_idx + + for word in words: + _, resp = await ( + CallTrigger(sim) + .call(self.m.bus_master_adapter.get_read_response_mock, data=word, err=0) + .call(self.m.accept_res) + ) + assert resp["data"] == word + assert resp["error"] == 0 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(process) + + +class MockedDataCacheRefiller(Elaboratable, DataCacheRefillerInterface): + def __init__(self, gen_params: GenParams): + layouts = gen_params.get(DCacheLayouts) + + self.start_refill_mock = TestbenchIO(Adapter(i=layouts.start_refill)) + self.accept_refill_mock = TestbenchIO(Adapter(o=layouts.accept_refill)) + self.start_writeback_mock = TestbenchIO(Adapter(i=layouts.start_writeback)) + self.accept_writeback_mock = TestbenchIO(Adapter(o=layouts.accept_writeback)) + + self.start_refill = self.start_refill_mock.adapter.iface + self.accept_refill = self.accept_refill_mock.adapter.iface + self.start_writeback = self.start_writeback_mock.adapter.iface + self.accept_writeback = self.accept_writeback_mock.adapter.iface + + def elaborate(self, platform): + return ModuleConnector( + start_refill=self.start_refill_mock, + accept_refill=self.accept_refill_mock, + start_writeback=self.start_writeback_mock, + accept_writeback=self.accept_writeback_mock, + ) + + +class DCacheTestCircuit(Elaboratable): + def __init__(self, gen_params: GenParams): + self.gen_params = gen_params + self.cp = self.gen_params.dcache_params + + def elaborate(self, platform): + self.refiller = MockedDataCacheRefiller(self.gen_params) + self.cache = DCache(self.gen_params.get(DCacheLayouts), self.cp, self.refiller) + self.issue_req = TestbenchIO(AdapterTrans.create(self.cache.issue_req)) + self.accept_res = TestbenchIO(AdapterTrans.create(self.cache.accept_res)) + self.flush_cache = TestbenchIO(AdapterTrans.create(self.cache.flush)) + self.provide_writeback_data = TestbenchIO(AdapterTrans.create(self.cache.provide_writeback_data)) + + return ModuleConnector( + refiller=self.refiller, + cache=self.cache, + issue_req=self.issue_req, + accept_res=self.accept_res, + flush_cache=self.flush_cache, + provide_writeback_data=self.provide_writeback_data, + ) + + +class TestDCache(TestCaseWithSimulator): + def setup_method(self) -> None: + self.gen_params = GenParams( + test_core_config.replace( + xlen=32, + dcache_ways=2, + dcache_sets_bits=2, + dcache_line_bytes_log=4, + ) + ) + self.cp = self.gen_params.dcache_params + self.m = DCacheTestCircuit(self.gen_params) + self.refill_start_calls = deque() + self.refill_responses = deque() + self.writeback_start_calls = deque() + self.writeback_accept_responses = deque() + self.allow_writeback_accept = False + + @def_method_mock(lambda self: self.m.refiller.start_refill_mock, enable=lambda self: True) + def start_refill_unexpected(self, addr): + @MethodMock.effect + def eff(): + self.refill_start_calls.append(addr) + if not self.refill_responses: + self.refill_responses.append({"addr": addr, "data": 0, "error": 1, "last": 1}) + + @def_method_mock(lambda self: self.m.refiller.accept_refill_mock, enable=lambda self: True) + def accept_refill_unexpected(self): + @MethodMock.effect + def eff(): + if not self.refill_responses: + raise AssertionError("unexpected accept_refill call") + self.refill_responses.popleft() + + if self.refill_responses: + return self.refill_responses[0] + return {"addr": 0, "data": 0, "error": 0, "last": 1} + + @def_method_mock(lambda self: self.m.refiller.start_writeback_mock, enable=lambda self: True) + def start_writeback_unexpected(self, addr): + @MethodMock.effect + def eff(): + self.writeback_start_calls.append(addr) + + @def_method_mock( + lambda self: self.m.refiller.accept_writeback_mock, + enable=lambda self: self.allow_writeback_accept and bool(self.writeback_accept_responses), + ) + def accept_writeback_unexpected(self): + @MethodMock.effect + def eff(): + if not self.writeback_accept_responses: + raise AssertionError("unexpected accept_writeback call") + self.writeback_accept_responses.popleft() + + return self.writeback_accept_responses[0] + + def split_addr(self, addr: int) -> tuple[int, int, int]: + index = (addr >> self.cp.offset_bits) & (self.cp.num_of_sets - 1) + tag = addr >> (self.cp.offset_bits + self.cp.index_bits) + word_offset = (addr & (self.cp.line_size_bytes - 1)) >> exact_log2(self.cp.word_width_bytes) + return tag, index, word_offset + + def encode_tag_entry(self, *, valid: int, dirty: int, tag: int) -> dict[str, int]: + return {"valid": valid, "dirty": dirty, "tag": tag} + + def line_word_addr(self, index: int, word_offset: int) -> int: + return (index << exact_log2(self.cp.words_in_line)) | word_offset + + def merge_word(self, initial: int, new: int, byte_mask: int) -> int: + result = initial + for byte in range(self.cp.word_width_bytes): + if byte_mask & (1 << byte): + byte_shift = byte * 8 + result &= ~(0xFF << byte_shift) + result |= ((new >> byte_shift) & 0xFF) << byte_shift + return result + + async def wait_for_flush(self, sim: TestbenchContext): + for _ in range(self.cp.num_of_sets * 3 + 4): + await sim.tick() + + async def load_line_directly( + self, sim: TestbenchContext, addr_base: int, words: list[int], *, way: int = 0, dirty: int = 0 + ): + tag, index, _ = self.split_addr(addr_base) + sim.set( + self.m.cache.mem.tag_mems[way].data[index], # type: ignore[arg-type] + self.encode_tag_entry(valid=1, dirty=dirty, tag=tag), + ) + + for word_offset, word in enumerate(words): + mem_addr = self.line_word_addr(index, word_offset) + sim.set(self.m.cache.mem.data_mems[way].data[mem_addr], word) # type: ignore[arg-type] + + await sim.tick() + + async def call_cache(self, sim: TestbenchContext, *, addr: int, data: int = 0, byte_mask: int = 0, store: int = 0): + await self.m.issue_req.call(sim, addr=addr, data=data, byte_mask=byte_mask, store=store) + return await self.m.accept_res.call(sim) + + def queue_refill_line(self, line_addr: int, words: list[int], *, error: int = 0): + for i, word in enumerate(words): + self.refill_responses.append( + { + "addr": line_addr + i * self.cp.word_width_bytes, + "data": word, + "error": error, + "last": int(i == len(words) - 1 or error), + } + ) + if error: + break + + async def collect_writeback_line(self, sim: TestbenchContext, *, words_in_line: int) -> list[int]: + words = [] + await sim.tick() + for _ in range(words_in_line): + resp = await self.m.provide_writeback_data.call(sim) + words.append(resp["data"]) + await sim.tick() + return words + + async def wait_until(self, sim: TestbenchContext, pred, *, max_ticks: int = 50): + for _ in range(max_ticks): + if pred(): + return + await sim.tick() + raise AssertionError("condition not met in time") + + def read_tag_entry(self, sim: TestbenchContext, *, way: int, index: int) -> dict[str, int]: + raw_tag = sim.get(self.m.cache.mem.tag_mems[way].data[index]) # type: ignore[arg-type] + return { + "valid": raw_tag["valid"], + "dirty": raw_tag["dirty"], + "tag": raw_tag["tag"], + } + + def read_data_word(self, sim: TestbenchContext, *, way: int, index: int, word_offset: int) -> int: + mem_addr = self.line_word_addr(index, word_offset) + return sim.get(self.m.cache.mem.data_mems[way].data[mem_addr]) # type: ignore[arg-type] + + def same_set_addr(self, addr: int, distance: int) -> int: + return addr + distance * self.cp.num_of_sets * self.cp.line_size_bytes + + def test_miss_returns_error_on_empty_refiller(self): + async def cache_process(sim: TestbenchContext): + await self.wait_for_flush(sim) + + resp = await self.call_cache(sim, addr=0x00000100) + + assert resp["error"] == 1 + assert resp["data"] == 0 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_load_hit(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000120 + words = [0x11223344, 0x55667788, 0x99AABBCC, 0xDDEEFF00] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, base_addr, words, way=0, dirty=0) + + resp = await self.call_cache(sim, addr=base_addr + self.cp.word_width_bytes) + + assert resp["error"] == 0 + assert resp["data"] == words[1] + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_store_hit(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000140 + initial_words = [0x11223344, 0x55667788, 0x99AABBCC, 0xDDEEFF00] + store_addr = base_addr + self.cp.word_width_bytes + store_data = 0xAABBCCDD + byte_mask = 0b0101 + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, base_addr, initial_words, way=0, dirty=0) + + resp = await self.call_cache(sim, addr=store_addr, data=store_data, byte_mask=byte_mask, store=1) + + assert resp["error"] == 0 + assert resp["data"] == 0 + + await sim.tick() + + tag, index, word_offset = self.split_addr(store_addr) + expected_word = self.merge_word(initial_words[1], store_data, byte_mask) + + stored_word = self.read_data_word(sim, way=0, index=index, word_offset=word_offset) + stored_tag = self.read_tag_entry(sim, way=0, index=index) + + assert stored_word == expected_word + assert stored_tag["valid"] == 1 + assert stored_tag["dirty"] == 1 + assert stored_tag["tag"] == tag + + load_resp = await self.call_cache(sim, addr=store_addr) + assert load_resp["error"] == 0 + assert load_resp["data"] == expected_word + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_requests_queued(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000180 + words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, base_addr, words, way=0, dirty=0) + + await self.m.issue_req.call(sim, addr=base_addr, data=0, byte_mask=0, store=0) + + ret = await self.m.issue_req.call_try( + sim, addr=base_addr + self.cp.word_width_bytes, data=0, byte_mask=0, store=0 + ) + assert ret is not None + + first_resp = await self.m.accept_res.call(sim) + assert first_resp["error"] == 0 + assert first_resp["data"] == words[0] + + second_resp = await self.m.accept_res.call(sim) + assert second_resp["error"] == 0 + assert second_resp["data"] == words[1] + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_request_queue_full(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000180 + words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, base_addr, words, way=0, dirty=0) + + for word_offset in range(self.cp.request_depth): + await self.m.issue_req.call( + sim, + addr=base_addr + (word_offset % self.cp.words_in_line) * self.cp.word_width_bytes, + data=0, + byte_mask=0, + store=0, + ) + + ret = await self.m.issue_req.call_try(sim, addr=base_addr, data=0, byte_mask=0, store=0) + assert ret is None + + for word_offset in range(self.cp.request_depth): + resp = await self.m.accept_res.call(sim) + assert resp["error"] == 0 + assert resp["data"] == words[word_offset % self.cp.words_in_line] + + ret = await self.m.issue_req.call_try(sim, addr=base_addr, data=0, byte_mask=0, store=0) + assert ret is not None + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_flush_invalidates(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x000001C0 + words = [0xCAFEBABE, 0x0BADF00D, 0x12345678, 0x89ABCDEF] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, base_addr, words, way=0, dirty=0) + + hit_resp = await self.call_cache(sim, addr=base_addr) + assert hit_resp["error"] == 0 + assert hit_resp["data"] == words[0] + + await self.m.flush_cache.call(sim) + await self.wait_for_flush(sim) + + miss_resp = await self.call_cache(sim, addr=base_addr) + assert miss_resp["error"] == 1 + assert miss_resp["data"] == 0 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_load_miss(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000200 + words = [0xAAAABBBB, 0xCCCCDDDD, 0x11112222, 0x33334444] + + await self.wait_for_flush(sim) + self.queue_refill_line(base_addr, words) + + resp = await self.call_cache(sim, addr=base_addr + self.cp.word_width_bytes) + + assert list(self.refill_start_calls) == [base_addr] + assert resp["error"] == 0 + assert resp["data"] == words[1] + assert not self.refill_responses + + hit_resp = await self.call_cache(sim, addr=base_addr + 2 * self.cp.word_width_bytes) + assert hit_resp["error"] == 0 + assert hit_resp["data"] == words[2] + assert list(self.refill_start_calls) == [base_addr] + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_store_miss(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000240 + initial_words = [0x10203040, 0x50607080, 0x90A0B0C0, 0xD0E0F000] + store_addr = base_addr + self.cp.word_width_bytes + store_data = 0x11223344 + byte_mask = 0b0011 + + await self.wait_for_flush(sim) + self.queue_refill_line(base_addr, initial_words) + + resp = await self.call_cache(sim, addr=store_addr, data=store_data, byte_mask=byte_mask, store=1) + + assert list(self.refill_start_calls) == [base_addr] + assert resp["error"] == 0 + assert resp["data"] == 0 + assert not self.refill_responses + + await sim.tick() + + tag, index, word_offset = self.split_addr(store_addr) + expected_word = self.merge_word(initial_words[1], store_data, byte_mask) + stored_word = self.read_data_word(sim, way=0, index=index, word_offset=word_offset) + stored_tag = self.read_tag_entry(sim, way=0, index=index) + + assert stored_word == expected_word + assert stored_tag["valid"] == 1 + assert stored_tag["dirty"] == 1 + assert stored_tag["tag"] == tag + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_refill_with_queued_requests(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000260 + words = [0x10203040, 0x50607080, 0x90A0B0C0, 0xD0E0F000] + + await self.wait_for_flush(sim) + self.queue_refill_line(base_addr, words) + + await self.m.issue_req.call( + sim, + addr=base_addr + self.cp.word_width_bytes, + data=0, + byte_mask=0, + store=0, + ) + ret = await self.m.issue_req.call_try( + sim, + addr=base_addr + 2 * self.cp.word_width_bytes, + data=0, + byte_mask=0, + store=0, + ) + assert ret is not None + + first_resp = await self.m.accept_res.call(sim) + second_resp = await self.m.accept_res.call(sim) + + assert list(self.refill_start_calls) == [base_addr] + assert first_resp["error"] == 0 + assert first_resp["data"] == words[1] + assert second_resp["error"] == 0 + assert second_resp["data"] == words[2] + assert not self.refill_responses + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_refill_error(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000280 + words = [0xABCDEF01, 0x23456789, 0x3456789A, 0x456789AB] + + await self.wait_for_flush(sim) + self.queue_refill_line(base_addr, words, error=1) + + resp = await self.call_cache(sim, addr=base_addr) + + assert list(self.refill_start_calls) == [base_addr] + assert resp["error"] == 1 + assert resp["data"] == 0 + assert not self.refill_responses + + _, index, _ = self.split_addr(base_addr) + way0_entry = self.read_tag_entry(sim, way=0, index=index) + way1_entry = self.read_tag_entry(sim, way=1, index=index) + + assert way0_entry["valid"] == 0 + assert way1_entry["valid"] == 0 + assert not self.writeback_start_calls + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_load_miss_multiple_lines(self): + async def cache_process(sim: TestbenchContext): + old_base_addr = 0x00000100 + old_words = [0xDEADBEEF, 0x11223344, 0x55667788, 0x99AABBCC] + other_base_addr = 0x00000140 + other_words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + new_base_addr = 0x00000200 + new_words = [0xAAAABBBB, 0xCCCCDDDD, 0xEEEEFFFF, 0x12345678] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, old_base_addr, old_words, way=0, dirty=1) + await self.load_line_directly(sim, other_base_addr, other_words, way=1, dirty=0) + self.queue_refill_line(new_base_addr, new_words) + self.writeback_accept_responses.append({"error": 0}) + + await self.m.issue_req.call( + sim, addr=new_base_addr + self.cp.word_width_bytes, data=0, byte_mask=0, store=0 + ) + + await self.wait_until(sim, lambda: len(self.writeback_start_calls) == 1) + assert list(self.writeback_start_calls) == [old_base_addr] + assert not self.refill_start_calls + + written_back_words = await self.collect_writeback_line(sim, words_in_line=self.cp.words_in_line) + assert written_back_words == old_words + assert not self.refill_start_calls + + self.allow_writeback_accept = True + resp = await self.m.accept_res.call(sim) + + assert list(self.refill_start_calls) == [new_base_addr] + assert resp["error"] == 0 + assert resp["data"] == new_words[1] + assert not self.refill_responses + + _, index, _ = self.split_addr(new_base_addr) + new_tag, _, _ = self.split_addr(new_base_addr) + stored_tag = self.read_tag_entry(sim, way=0, index=index) + hit_resp = await self.call_cache(sim, addr=new_base_addr + 2 * self.cp.word_width_bytes) + + assert stored_tag["valid"] == 1 + assert stored_tag["tag"] == new_tag + assert hit_resp["error"] == 0 + assert hit_resp["data"] == new_words[2] + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_store_miss_multiple_lines(self): + async def cache_process(sim: TestbenchContext): + old_base_addr = 0x00000140 + old_words = [0xCAFEBABE, 0x0BADF00D, 0x01020304, 0xA0B0C0D0] + other_base_addr = 0x00000100 + other_words = [0xDEADBEEF, 0x11223344, 0x55667788, 0x99AABBCC] + new_base_addr = 0x00000240 + new_words = [0x10203040, 0x50607080, 0x90A0B0C0, 0xD0E0F000] + store_addr = new_base_addr + self.cp.word_width_bytes + store_data = 0x11223344 + byte_mask = 0b0011 + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, old_base_addr, old_words, way=0, dirty=1) + await self.load_line_directly(sim, other_base_addr, other_words, way=1, dirty=0) + self.queue_refill_line(new_base_addr, new_words) + self.writeback_accept_responses.append({"error": 0}) + + await self.m.issue_req.call(sim, addr=store_addr, data=store_data, byte_mask=byte_mask, store=1) + + await self.wait_until(sim, lambda: len(self.writeback_start_calls) == 1) + assert list(self.writeback_start_calls) == [old_base_addr] + assert not self.refill_start_calls + + written_back_words = await self.collect_writeback_line(sim, words_in_line=self.cp.words_in_line) + assert written_back_words == old_words + assert not self.refill_start_calls + + self.allow_writeback_accept = True + resp = await self.m.accept_res.call(sim) + + assert list(self.refill_start_calls) == [new_base_addr] + assert resp["error"] == 0 + assert resp["data"] == 0 + assert not self.refill_responses + + await sim.tick() + + new_tag, index, word_offset = self.split_addr(store_addr) + expected_word = self.merge_word(new_words[1], store_data, byte_mask) + stored_word = self.read_data_word(sim, way=0, index=index, word_offset=word_offset) + stored_tag = self.read_tag_entry(sim, way=0, index=index) + + assert stored_word == expected_word + assert stored_tag["valid"] == 1 + assert stored_tag["dirty"] == 1 + assert stored_tag["tag"] == new_tag + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_miss_writeback_error(self): + async def cache_process(sim: TestbenchContext): + old_base_addr = 0x00000100 + old_words = [0xDEADBEEF, 0x11223344, 0x55667788, 0x99AABBCC] + other_base_addr = 0x00000140 + other_words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + new_base_addr = 0x00000200 + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, old_base_addr, old_words, way=0, dirty=1) + await self.load_line_directly(sim, other_base_addr, other_words, way=1, dirty=0) + self.writeback_accept_responses.append({"error": 1}) + + await self.m.issue_req.call(sim, addr=new_base_addr, data=0, byte_mask=0, store=0) + + await self.wait_until(sim, lambda: len(self.writeback_start_calls) == 1) + assert list(self.writeback_start_calls) == [old_base_addr] + assert not self.refill_start_calls + + written_back_words = await self.collect_writeback_line(sim, words_in_line=self.cp.words_in_line) + assert written_back_words == old_words + + self.allow_writeback_accept = True + resp = await self.m.accept_res.call(sim) + + assert resp["error"] == 1 + assert resp["data"] == 0 + assert not self.refill_start_calls + + old_tag, index, _ = self.split_addr(old_base_addr) + stored_tag = self.read_tag_entry(sim, way=0, index=index) + assert stored_tag["valid"] == 1 + assert stored_tag["dirty"] == 1 + assert stored_tag["tag"] == old_tag + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_flush_same_set(self): + async def cache_process(sim: TestbenchContext): + first_cache_line_addr = 0x00000100 + first_words = [0xDEADBEEF, 0x11223344, 0x55667788, 0x99AABBCC] + second_cache_line_addr = 0x00000200 + second_words = [0xDEADBEE9, 0x11223349, 0x55667789, 0x99AABBC9] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, first_cache_line_addr, first_words, way=0, dirty=1) + await self.load_line_directly(sim, second_cache_line_addr, second_words, way=1, dirty=0) + + self.writeback_accept_responses.append({"error": 0}) + + await self.m.flush_cache.call(sim) + await self.wait_until(sim, lambda: len(self.writeback_start_calls) == 1) + assert list(self.writeback_start_calls) == [first_cache_line_addr] + assert not self.refill_start_calls + + written_back_words = await self.collect_writeback_line(sim, words_in_line=self.cp.words_in_line) + assert written_back_words == first_words + + self.allow_writeback_accept = True + await self.wait_for_flush(sim) + + _, first_index, _ = self.split_addr(first_cache_line_addr) + _, second_index, _ = self.split_addr(second_cache_line_addr) + + first_tag = self.read_tag_entry(sim, way=0, index=first_index) + second_tag = self.read_tag_entry(sim, way=1, index=second_index) + + assert first_tag["valid"] == 0 + assert first_tag["dirty"] == 0 + assert second_tag["valid"] == 0 + assert second_tag["dirty"] == 0 + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_flush_writeback_error(self): + async def cache_process(sim: TestbenchContext): + base_addr = 0x00000100 + words = [0xDEADBEEF, 0x11223344, 0x55667788, 0x99AABBCC] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, base_addr, words, way=0, dirty=1) + self.writeback_accept_responses.append({"error": 1}) + + await self.m.flush_cache.call(sim) + await self.wait_until(sim, lambda: len(self.writeback_start_calls) == 1) + assert list(self.writeback_start_calls) == [base_addr] + + written_back_words = await self.collect_writeback_line(sim, words_in_line=self.cp.words_in_line) + assert written_back_words == words + + self.allow_writeback_accept = True + await self.wait_for_flush(sim) + + tag, index, _ = self.split_addr(base_addr) + stored_tag = self.read_tag_entry(sim, way=0, index=index) + + assert stored_tag["valid"] == 1 + assert stored_tag["dirty"] == 1 + assert stored_tag["tag"] == tag + assert not self.refill_start_calls + + hit_resp = await self.call_cache(sim, addr=base_addr) + assert hit_resp["error"] == 0 + assert hit_resp["data"] == words[0] + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_miss_prefers_invalid_way(self): + async def cache_process(sim: TestbenchContext): + way0_addr = 0x00000100 + refill_addr = self.same_set_addr(way0_addr, 1) + way0_words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + refill_words = [0xAABBCCDD, 0x10203040, 0x50607080, 0x90A0B0C0] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, way0_addr, way0_words, way=0, dirty=0) + self.queue_refill_line(refill_addr, refill_words) + + resp = await self.call_cache(sim, addr=refill_addr) + + assert resp["error"] == 0 + assert resp["data"] == refill_words[0] + assert list(self.refill_start_calls) == [refill_addr] + + way0_tag, index, _ = self.split_addr(way0_addr) + refill_tag, _, _ = self.split_addr(refill_addr) + way0_entry = self.read_tag_entry(sim, way=0, index=index) + way1_entry = self.read_tag_entry(sim, way=1, index=index) + + assert way0_entry["valid"] == 1 + assert way0_entry["tag"] == way0_tag + assert way1_entry["valid"] == 1 + assert way1_entry["tag"] == refill_tag + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_miss_uses_round_robin(self): + async def cache_process(sim: TestbenchContext): + way0_addr = 0x00000100 + way1_addr = self.same_set_addr(way0_addr, 1) + refill_addr = self.same_set_addr(way0_addr, 2) + way0_words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + way1_words = [0x41424344, 0x51525354, 0x61626364, 0x71727374] + refill_words = [0xAABBCCDD, 0x10203040, 0x50607080, 0x90A0B0C0] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, way0_addr, way0_words, way=0, dirty=0) + await self.load_line_directly(sim, way1_addr, way1_words, way=1, dirty=0) + self.queue_refill_line(refill_addr, refill_words) + + resp = await self.call_cache(sim, addr=refill_addr) + + assert resp["error"] == 0 + assert resp["data"] == refill_words[0] + assert list(self.refill_start_calls) == [refill_addr] + + refill_tag, index, _ = self.split_addr(refill_addr) + way1_tag, _, _ = self.split_addr(way1_addr) + way0_entry = self.read_tag_entry(sim, way=0, index=index) + way1_entry = self.read_tag_entry(sim, way=1, index=index) + + assert way0_entry["valid"] == 1 + assert way0_entry["tag"] == refill_tag + assert way1_entry["valid"] == 1 + assert way1_entry["tag"] == way1_tag + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) + + def test_round_robin_advances_only_if_used(self): + async def cache_process(sim: TestbenchContext): + way0_addr = 0x00000100 + way1_addr = self.same_set_addr(way0_addr, 1) + first_refill_addr = self.same_set_addr(way0_addr, 2) + second_refill_addr = self.same_set_addr(way0_addr, 3) + third_refill_addr = self.same_set_addr(way0_addr, 4) + way0_words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + first_refill_words = [0xA0A0A0A0, 0xA1A1A1A1, 0xA2A2A2A2, 0xA3A3A3A3] + way1_words = [0xB0B0B0B0, 0xB1B1B1B1, 0xB2B2B2B2, 0xB3B3B3B3] + second_refill_words = [0xC0C0C0C0, 0xC1C1C1C1, 0xC2C2C2C2, 0xC3C3C3C3] + third_refill_words = [0xD0D0D0D0, 0xD1D1D1D1, 0xD2D2D2D2, 0xD3D3D3D3] + + await self.wait_for_flush(sim) + await self.load_line_directly(sim, way0_addr, way0_words, way=0, dirty=0) + + self.queue_refill_line(first_refill_addr, first_refill_words) + first_resp = await self.call_cache(sim, addr=first_refill_addr) + assert first_resp["error"] == 0 + + _, index, _ = self.split_addr(way0_addr) + first_refill_tag, _, _ = self.split_addr(first_refill_addr) + assert self.read_tag_entry(sim, way=1, index=index)["tag"] == first_refill_tag + + await self.load_line_directly(sim, way1_addr, way1_words, way=1, dirty=0) + self.queue_refill_line(second_refill_addr, second_refill_words) + second_resp = await self.call_cache(sim, addr=second_refill_addr) + assert second_resp["error"] == 0 + + second_refill_tag, _, _ = self.split_addr(second_refill_addr) + assert self.read_tag_entry(sim, way=0, index=index)["tag"] == second_refill_tag + + self.queue_refill_line(third_refill_addr, third_refill_words) + third_resp = await self.call_cache(sim, addr=third_refill_addr) + assert third_resp["error"] == 0 + + third_refill_tag, _, _ = self.split_addr(third_refill_addr) + assert self.read_tag_entry(sim, way=1, index=index)["tag"] == third_refill_tag + + with self.run_simulation(self.m) as sim: + sim.add_testbench(cache_process) diff --git a/test/func_blocks/lsu/test_dummylsu.py b/test/func_blocks/lsu/test_dummylsu.py index 92514f992..99220b0e5 100644 --- a/test/func_blocks/lsu/test_dummylsu.py +++ b/test/func_blocks/lsu/test_dummylsu.py @@ -184,7 +184,7 @@ def generate_instr(self, max_reg_val, max_imm_val): def setup_method(self) -> None: random.seed(14) self.tests_number = 100 - self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=4)) + self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=4, dcache_enable=False)) self.test_module = DummyLSUTestCircuit(self.gen_params) self.instr_queue = deque() self.mem_data_queue = deque() @@ -296,7 +296,7 @@ def generate_instr(self, max_reg_val, max_imm_val): def setup_method(self) -> None: random.seed(14) - self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=3)) + self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=3, dcache_enable=False)) self.test_module = DummyLSUTestCircuit(self.gen_params) async def one_instr_test(self, sim: TestbenchContext): @@ -375,7 +375,7 @@ def generate_instr(self, max_reg_val, max_imm_val): def setup_method(self) -> None: random.seed(14) self.tests_number = 100 - self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=3)) + self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=3, dcache_enable=False)) self.test_module = DummyLSUTestCircuit(self.gen_params) self.instr_queue = deque() self.mem_data_queue = deque() @@ -462,7 +462,7 @@ async def process(self, sim: TestbenchContext): await self.push_one_instr(sim, self.get_instr(load_fn)) def test_fence(self): - self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=3)) + self.gen_params = GenParams(test_core_config.replace(phys_regs_bits=3, rob_entries_bits=3, dcache_enable=False)) self.test_module = DummyLSUTestCircuit(self.gen_params) @def_method_mock(lambda: self.test_module.exception_report) @@ -495,3 +495,81 @@ def eff(): with self.run_simulation(self.test_module) as sim: sim.add_testbench(self.process) + + +class TestDummyLSUDCacheIntegration(TestCaseWithSimulator): + def setup_method(self) -> None: + self.gen_params = GenParams( + test_core_config.replace( + phys_regs_bits=3, + rob_entries_bits=3, + dcache_enable=True, + dcache_ways=2, + dcache_sets_bits=2, + dcache_line_bytes_log=4, + ) + ) + self.test_module = DummyLSUTestCircuit(self.gen_params) + self.cp = self.gen_params.dcache_params + + def get_load_instr(self, addr: int, rob_id: int): + return { + "rp_dst": 1, + "rob_id": rob_id, + "exec_fn": {"op_type": OpType.LOAD, "funct3": Funct3.W, "funct7": 0}, + "s1_val": addr, + "s2_val": 0, + "imm": 0, + "pc": 0, + } + + async def respond_to_refill(self, sim: TestbenchContext, base_addr: int, words: list[int]): + for word_idx, word in enumerate(words): + req = await self.test_module.bus_master_adapter.request_read_mock.call(sim) + assert req["addr"] == (base_addr >> 2) + word_idx + assert req["sel"] == 0xF + await self.test_module.bus_master_adapter.get_read_response_mock.call(sim, data=word, err=0) + + async def process(self, sim: TestbenchContext): + base_addr = 0x00000100 + words = [0x01020304, 0x11121314, 0x21222324, 0x31323334] + + await self.test_module.issue.call(sim, self.get_load_instr(base_addr, rob_id=1)) + await self.respond_to_refill(sim, base_addr, words) + + first_result = await self.test_module.push_result.call(sim) + assert first_result["rob_id"] == 1 + assert first_result["result"] == words[0] + assert first_result["exception"] == 0 + + for word_idx in range(1, self.cp.words_in_line): + addr = base_addr + word_idx * self.cp.word_width_bytes + await self.test_module.issue.call(sim, self.get_load_instr(addr, rob_id=word_idx + 1)) + + for _ in range(4): + req = await self.test_module.bus_master_adapter.request_read_mock.call_try(sim) + assert req is None + await sim.tick() + + result = await self.test_module.push_result.call(sim) + assert result["rob_id"] == word_idx + 1 + assert result["result"] == words[word_idx] + assert result["exception"] == 0 + + def test_first_load_refills_and_following_loads_hit(self): + @def_method_mock(lambda: self.test_module.exception_report) + def exception_consumer(arg): + @MethodMock.effect + def eff(): + assert False + + @def_method_mock(lambda: self.test_module.precommit, validate_arguments=lambda rob_id: True) + def precommiter(rob_id): + return {"side_fx": 1} + + @def_method_mock(lambda: self.test_module.core_state) + def core_state_process(): + return {"flushing": 0} + + with self.run_simulation(self.test_module) as sim: + sim.add_testbench(self.process) diff --git a/test/func_blocks/lsu/test_pma.py b/test/func_blocks/lsu/test_pma.py index 081bf1f51..feac174ae 100644 --- a/test/func_blocks/lsu/test_pma.py +++ b/test/func_blocks/lsu/test_pma.py @@ -35,7 +35,7 @@ def test_pma_direct(self): PMARegion(0x121, 0x130, False), ] - self.gen_params = GenParams(test_core_config.replace(pma=self.pma_regions)) + self.gen_params = GenParams(test_core_config.replace(pma=self.pma_regions, dcache_enable=False)) self.test_module = PMAChecker(self.gen_params) with self.run_simulation(self.test_module) as sim: @@ -125,7 +125,7 @@ def test_pma_indirect(self): PMARegion(0x10, 0x1F, False), PMARegion(0x20, 0x2F, True), ] - self.gen_params = GenParams(test_core_config.replace(pma=self.pma_regions)) + self.gen_params = GenParams(test_core_config.replace(pma=self.pma_regions, dcache_enable=False)) self.test_module = PMAIndirectTestCircuit(self.gen_params) self.precommit_enabled = False