Skip to content

Commit 8fb9b14

Browse files
committed
Merge remote-tracking branch 'origin/master' into tilk/fifo-rs
2 parents 20e0125 + f8add3c commit 8fb9b14

38 files changed

Lines changed: 1314 additions & 414 deletions

coreblocks/cache/icache.py

Lines changed: 63 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from coreblocks.interface.layouts import ICacheLayouts
1212
from transactron.utils import assign, OneHotSwitchDynamic
1313
from transactron.lib import *
14+
from transactron.lib import logging
1415
from coreblocks.peripherals.bus_adapter import BusMasterInterface
1516

1617
from coreblocks.cache.iface import CacheInterface, CacheRefillerInterface
@@ -21,19 +22,7 @@
2122
"ICacheBypass",
2223
]
2324

24-
25-
def extract_instr_from_word(m: TModule, params: ICacheParameters, word: Signal, addr: Value):
26-
instr_out = Signal(params.instr_width)
27-
if len(word) == 32:
28-
m.d.comb += instr_out.eq(word)
29-
elif len(word) == 64:
30-
with m.If(addr[2] == 0):
31-
m.d.comb += instr_out.eq(word[:32]) # Take lower 4 bytes
32-
with m.Else():
33-
m.d.comb += instr_out.eq(word[32:]) # Take upper 4 bytes
34-
else:
35-
raise RuntimeError("Word size different than 32 and 64 is not supported")
36-
return instr_out
25+
log = logging.HardwareLogger("frontend.icache")
3726

3827

3928
class ICacheBypass(Elaboratable, CacheInterface):
@@ -45,6 +34,9 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, bus_master:
4534
self.accept_res = Method(o=layouts.accept_res)
4635
self.flush = Method()
4736

37+
if params.words_in_fetch_block != 1:
38+
raise ValueError("ICacheBypass only supports fetch block size equal to the word size.")
39+
4840
def elaborate(self, platform):
4941
m = TModule()
5042

@@ -63,7 +55,7 @@ def _(addr: Value) -> None:
6355
def _():
6456
res = self.bus_master.get_read_response(m)
6557
return {
66-
"instr": extract_instr_from_word(m, self.params, res.data, req_addr),
58+
"fetch_block": res.data,
6759
"error": res.err,
6860
}
6961

@@ -82,10 +74,10 @@ class ICache(Elaboratable, CacheInterface):
8274
8375
Refilling a cache line is abstracted away from this module. ICache module needs two methods
8476
from the refiller `refiller_start`, which is called whenever we need to refill a cache line.
85-
`refiller_accept` should be ready to be called whenever the refiller has another word ready
86-
to be written to cache. `refiller_accept` should set `last` bit when either an error occurs
87-
or the transfer is over. After issuing `last` bit, `refiller_accept` shouldn't be ready until
88-
the next transfer is started.
77+
`refiller_accept` should be ready to be called whenever the refiller has another fetch block
78+
ready to be written to cache. `refiller_accept` should set `last` bit when either an error
79+
occurs or the transfer is over. After issuing `last` bit, `refiller_accept` shouldn't be ready
80+
until the next transfer is started.
8981
"""
9082

9183
def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: CacheRefillerInterface) -> None:
@@ -123,7 +115,7 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: C
123115
self.perf_misses = HwCounter("frontend.icache.misses")
124116
self.perf_errors = HwCounter("frontend.icache.fetch_errors")
125117
self.perf_flushes = HwCounter("frontend.icache.flushes")
126-
self.req_latency = LatencyMeasurer(
118+
self.req_latency = FIFOLatencyMeasurer(
127119
"frontend.icache.req_latency", "Latencies of cache requests", slots_number=2, max_latency=500
128120
)
129121

@@ -150,14 +142,13 @@ def elaborate(self, platform):
150142
]
151143

152144
m.submodules.mem = self.mem = ICacheMemory(self.params)
153-
m.submodules.req_fifo = self.req_fifo = FIFO(layout=self.addr_layout, depth=2)
154-
m.submodules.res_fwd = self.res_fwd = Forwarder(layout=self.layouts.accept_res)
145+
m.submodules.req_zipper = req_zipper = ArgumentsToResultsZipper(self.addr_layout, self.layouts.accept_res)
155146

156147
# State machine logic
157148
needs_refill = Signal()
158149
refill_finish = Signal()
159-
refill_finish_last = Signal()
160150
refill_error = Signal()
151+
refill_error_saved = Signal()
161152

162153
flush_start = Signal()
163154
flush_finish = Signal()
@@ -166,6 +157,7 @@ def elaborate(self, platform):
166157
self.perf_flushes.incr(m, cond=flush_finish)
167158

168159
with m.FSM(reset="FLUSH") as fsm:
160+
169161
with m.State("FLUSH"):
170162
with m.If(flush_finish):
171163
m.next = "LOOKUP"
@@ -188,49 +180,56 @@ def elaborate(self, platform):
188180
m.d.sync += way_selector.eq(way_selector.rotate_left(1))
189181

190182
# Fast path - read requests
191-
request_valid = self.req_fifo.read.ready
192-
request_addr = Signal(self.addr_layout)
183+
mem_read_addr = Signal(self.addr_layout)
184+
prev_mem_read_addr = Signal(self.addr_layout)
185+
m.d.comb += assign(mem_read_addr, prev_mem_read_addr)
193186

194-
tag_hit = [tag_data.valid & (tag_data.tag == request_addr.tag) for tag_data in self.mem.tag_rd_data]
195-
tag_hit_any = reduce(operator.or_, tag_hit)
187+
mem_read_output_valid = Signal()
188+
with Transaction(name="MemRead").body(
189+
m, request=fsm.ongoing("LOOKUP") & (mem_read_output_valid | refill_error_saved)
190+
):
191+
req_addr = req_zipper.peek_arg(m)
196192

197-
mem_out = Signal(self.params.word_width)
198-
for i in OneHotSwitchDynamic(m, Cat(tag_hit)):
199-
m.d.comb += mem_out.eq(self.mem.data_rd_data[i])
193+
tag_hit = [tag_data.valid & (tag_data.tag == req_addr.tag) for tag_data in self.mem.tag_rd_data]
194+
tag_hit_any = reduce(operator.or_, tag_hit)
200195

201-
instr_out = extract_instr_from_word(m, self.params, mem_out, Value.cast(request_addr))
196+
with m.If(tag_hit_any | refill_error_saved):
197+
self.perf_hits.incr(m, cond=tag_hit_any)
198+
mem_out = Signal(self.params.fetch_block_bytes * 8)
199+
for i in OneHotSwitchDynamic(m, Cat(tag_hit)):
200+
m.d.av_comb += mem_out.eq(self.mem.data_rd_data[i])
202201

203-
refill_error_saved = Signal()
204-
m.d.comb += needs_refill.eq(request_valid & ~tag_hit_any & ~refill_error_saved)
202+
req_zipper.write_results(m, fetch_block=mem_out, error=refill_error_saved)
203+
m.d.sync += refill_error_saved.eq(0)
204+
m.d.sync += mem_read_output_valid.eq(0)
205+
with m.Else():
206+
self.perf_misses.incr(m)
205207

206-
with Transaction().body(m, request=request_valid & fsm.ongoing("LOOKUP") & (tag_hit_any | refill_error_saved)):
207-
self.perf_errors.incr(m, cond=refill_error_saved)
208-
self.perf_misses.incr(m, cond=refill_finish_last)
209-
self.perf_hits.incr(m, cond=~refill_finish_last)
208+
m.d.comb += needs_refill.eq(1)
210209

211-
self.res_fwd.write(m, instr=instr_out, error=refill_error_saved)
212-
m.d.sync += refill_error_saved.eq(0)
210+
# Align to the beginning of the cache line
211+
aligned_addr = self.serialize_addr(req_addr) & ~((1 << self.params.offset_bits) - 1)
212+
log.debug(m, True, "Refilling line 0x{:x}", aligned_addr)
213+
self.refiller.start_refill(m, addr=aligned_addr)
213214

214215
@def_method(m, self.accept_res)
215216
def _():
216-
self.req_fifo.read(m)
217217
self.req_latency.stop(m)
218-
return self.res_fwd.read(m)
219218

220-
mem_read_addr = Signal(self.addr_layout)
221-
m.d.comb += assign(mem_read_addr, request_addr)
219+
output = req_zipper.read(m)
220+
return output.results
222221

223222
@def_method(m, self.issue_req, ready=accepting_requests)
224223
def _(addr: Value) -> None:
225224
self.perf_loads.incr(m)
226225
self.req_latency.start(m)
227226

228227
deserialized = self.deserialize_addr(addr)
229-
# Forward read address only if the method is called
230228
m.d.comb += assign(mem_read_addr, deserialized)
231-
m.d.sync += assign(request_addr, deserialized)
229+
m.d.sync += assign(prev_mem_read_addr, deserialized)
230+
req_zipper.write_args(m, deserialized)
232231

233-
self.req_fifo.write(m, deserialized)
232+
m.d.sync += mem_read_output_valid.eq(1)
234233

235234
m.d.comb += [
236235
self.mem.tag_rd_index.eq(mem_read_addr.index),
@@ -245,34 +244,30 @@ def _(addr: Value) -> None:
245244

246245
@def_method(m, self.flush, ready=accepting_requests)
247246
def _() -> None:
247+
log.info(m, True, "Flushing the cache...")
248248
m.d.sync += flush_index.eq(0)
249249
m.d.comb += flush_start.eq(1)
250250

251251
m.d.comb += flush_finish.eq(flush_index == self.params.num_of_sets - 1)
252252

253253
# Slow path - data refilling
254-
with Transaction().body(m, request=fsm.ongoing("LOOKUP") & needs_refill):
255-
# Align to the beginning of the cache line
256-
aligned_addr = self.serialize_addr(request_addr) & ~((1 << self.params.offset_bits) - 1)
257-
self.refiller.start_refill(m, addr=aligned_addr)
258-
259-
m.d.sync += refill_finish_last.eq(0)
260-
261254
with Transaction().body(m):
262255
ret = self.refiller.accept_refill(m)
263256
deserialized = self.deserialize_addr(ret.addr)
264257

258+
self.perf_errors.incr(m, cond=ret.error)
259+
265260
m.d.top_comb += [
266261
self.mem.data_wr_addr.index.eq(deserialized["index"]),
267262
self.mem.data_wr_addr.offset.eq(deserialized["offset"]),
268-
self.mem.data_wr_data.eq(ret.data),
263+
self.mem.data_wr_data.eq(ret.fetch_block),
269264
]
270265

271266
m.d.comb += self.mem.data_wr_en.eq(1)
272267
m.d.comb += refill_finish.eq(ret.last)
273-
m.d.sync += refill_finish_last.eq(1)
274268
m.d.comb += refill_error.eq(ret.error)
275-
m.d.sync += refill_error_saved.eq(ret.error)
269+
with m.If(ret.error):
270+
m.d.sync += refill_error_saved.eq(1)
276271

277272
with m.If(fsm.ongoing("FLUSH")):
278273
m.d.comb += [
@@ -285,9 +280,9 @@ def _() -> None:
285280
with m.Else():
286281
m.d.comb += [
287282
self.mem.way_wr_en.eq(way_selector),
288-
self.mem.tag_wr_index.eq(request_addr.index),
283+
self.mem.tag_wr_index.eq(mem_read_addr.index),
289284
self.mem.tag_wr_data.valid.eq(~refill_error),
290-
self.mem.tag_wr_data.tag.eq(request_addr.tag),
285+
self.mem.tag_wr_data.tag.eq(mem_read_addr.tag),
291286
self.mem.tag_wr_en.eq(refill_finish),
292287
]
293288

@@ -301,7 +296,7 @@ class ICacheMemory(Elaboratable):
301296
Writes are multiplexed using one-hot `way_wr_en` signal. Read data lines from all
302297
ways are separately exposed (as an array).
303298
304-
The data memory is addressed using a machine word.
299+
The data memory is addressed using fetch blocks.
305300
"""
306301

307302
def __init__(self, params: ICacheParameters) -> None:
@@ -319,11 +314,13 @@ def __init__(self, params: ICacheParameters) -> None:
319314

320315
self.data_addr_layout = make_layout(("index", self.params.index_bits), ("offset", self.params.offset_bits))
321316

317+
self.fetch_block_bits = params.fetch_block_bytes * 8
318+
322319
self.data_rd_addr = Signal(self.data_addr_layout)
323-
self.data_rd_data = Array([Signal(self.params.word_width) for _ in range(self.params.num_of_ways)])
320+
self.data_rd_data = Array([Signal(self.fetch_block_bits) for _ in range(self.params.num_of_ways)])
324321
self.data_wr_addr = Signal(self.data_addr_layout)
325322
self.data_wr_en = Signal()
326-
self.data_wr_data = Signal(self.params.word_width)
323+
self.data_wr_data = Signal(self.fetch_block_bits)
327324

328325
def elaborate(self, platform):
329326
m = TModule()
@@ -345,17 +342,18 @@ def elaborate(self, platform):
345342
tag_mem_wp.en.eq(self.tag_wr_en & way_wr),
346343
]
347344

348-
data_mem = Memory(width=self.params.word_width, depth=self.params.num_of_sets * self.params.words_in_block)
345+
data_mem = Memory(
346+
width=self.fetch_block_bits, depth=self.params.num_of_sets * self.params.fetch_blocks_in_line
347+
)
349348
data_mem_rp = data_mem.read_port()
350349
data_mem_wp = data_mem.write_port()
351350
m.submodules[f"data_mem_{i}_rp"] = data_mem_rp
352351
m.submodules[f"data_mem_{i}_wp"] = data_mem_wp
353352

354-
# We address the data RAM using machine words, so we have to
353+
# We address the data RAM using fetch blocks, so we have to
355354
# discard a few least significant bits from the address.
356-
redundant_offset_bits = exact_log2(self.params.word_width_bytes)
357-
rd_addr = Cat(self.data_rd_addr.offset, self.data_rd_addr.index)[redundant_offset_bits:]
358-
wr_addr = Cat(self.data_wr_addr.offset, self.data_wr_addr.index)[redundant_offset_bits:]
355+
rd_addr = Cat(self.data_rd_addr.offset, self.data_rd_addr.index)[self.params.fetch_block_bytes_log :]
356+
wr_addr = Cat(self.data_wr_addr.offset, self.data_wr_addr.index)[self.params.fetch_block_bytes_log :]
359357

360358
m.d.comb += [
361359
self.data_rd_data[i].eq(data_mem_rp.data),

coreblocks/cache/iface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class CacheRefillerInterface(HasElaborate, Protocol):
3535
start_refill : Method
3636
A method that is used to start a refill for a given cache line.
3737
accept_refill : Method
38-
A method that is used to accept one word from the requested cache line.
38+
A method that is used to accept one fetch block from the requested cache line.
3939
"""
4040

4141
start_refill: Method

0 commit comments

Comments
 (0)