1111from coreblocks .interface .layouts import ICacheLayouts
1212from transactron .utils import assign , OneHotSwitchDynamic
1313from transactron .lib import *
14+ from transactron .lib import logging
1415from coreblocks .peripherals .bus_adapter import BusMasterInterface
1516
1617from coreblocks .cache .iface import CacheInterface , CacheRefillerInterface
2122 "ICacheBypass" ,
2223]
2324
24-
25- def extract_instr_from_word (m : TModule , params : ICacheParameters , word : Signal , addr : Value ):
26- instr_out = Signal (params .instr_width )
27- if len (word ) == 32 :
28- m .d .comb += instr_out .eq (word )
29- elif len (word ) == 64 :
30- with m .If (addr [2 ] == 0 ):
31- m .d .comb += instr_out .eq (word [:32 ]) # Take lower 4 bytes
32- with m .Else ():
33- m .d .comb += instr_out .eq (word [32 :]) # Take upper 4 bytes
34- else :
35- raise RuntimeError ("Word size different than 32 and 64 is not supported" )
36- return instr_out
25+ log = logging .HardwareLogger ("frontend.icache" )
3726
3827
3928class ICacheBypass (Elaboratable , CacheInterface ):
@@ -45,6 +34,9 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, bus_master:
4534 self .accept_res = Method (o = layouts .accept_res )
4635 self .flush = Method ()
4736
37+ if params .words_in_fetch_block != 1 :
38+ raise ValueError ("ICacheBypass only supports fetch block size equal to the word size." )
39+
4840 def elaborate (self , platform ):
4941 m = TModule ()
5042
@@ -63,7 +55,7 @@ def _(addr: Value) -> None:
6355 def _ ():
6456 res = self .bus_master .get_read_response (m )
6557 return {
66- "instr " : extract_instr_from_word ( m , self . params , res .data , req_addr ) ,
58+ "fetch_block " : res .data ,
6759 "error" : res .err ,
6860 }
6961
@@ -82,10 +74,10 @@ class ICache(Elaboratable, CacheInterface):
8274
8375 Refilling a cache line is abstracted away from this module. ICache module needs two methods
8476 from the refiller `refiller_start`, which is called whenever we need to refill a cache line.
85- `refiller_accept` should be ready to be called whenever the refiller has another word ready
86- to be written to cache. `refiller_accept` should set `last` bit when either an error occurs
87- or the transfer is over. After issuing `last` bit, `refiller_accept` shouldn't be ready until
88- the next transfer is started.
77+ `refiller_accept` should be ready to be called whenever the refiller has another fetch block
78+ ready to be written to cache. `refiller_accept` should set `last` bit when either an error
79+ occurs or the transfer is over. After issuing `last` bit, `refiller_accept` shouldn't be ready
80+ until the next transfer is started.
8981 """
9082
9183 def __init__ (self , layouts : ICacheLayouts , params : ICacheParameters , refiller : CacheRefillerInterface ) -> None :
@@ -123,7 +115,7 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: C
123115 self .perf_misses = HwCounter ("frontend.icache.misses" )
124116 self .perf_errors = HwCounter ("frontend.icache.fetch_errors" )
125117 self .perf_flushes = HwCounter ("frontend.icache.flushes" )
126- self .req_latency = LatencyMeasurer (
118+ self .req_latency = FIFOLatencyMeasurer (
127119 "frontend.icache.req_latency" , "Latencies of cache requests" , slots_number = 2 , max_latency = 500
128120 )
129121
@@ -150,14 +142,13 @@ def elaborate(self, platform):
150142 ]
151143
152144 m .submodules .mem = self .mem = ICacheMemory (self .params )
153- m .submodules .req_fifo = self .req_fifo = FIFO (layout = self .addr_layout , depth = 2 )
154- m .submodules .res_fwd = self .res_fwd = Forwarder (layout = self .layouts .accept_res )
145+ m .submodules .req_zipper = req_zipper = ArgumentsToResultsZipper (self .addr_layout , self .layouts .accept_res )
155146
156147 # State machine logic
157148 needs_refill = Signal ()
158149 refill_finish = Signal ()
159- refill_finish_last = Signal ()
160150 refill_error = Signal ()
151+ refill_error_saved = Signal ()
161152
162153 flush_start = Signal ()
163154 flush_finish = Signal ()
@@ -166,6 +157,7 @@ def elaborate(self, platform):
166157 self .perf_flushes .incr (m , cond = flush_finish )
167158
168159 with m .FSM (reset = "FLUSH" ) as fsm :
160+
169161 with m .State ("FLUSH" ):
170162 with m .If (flush_finish ):
171163 m .next = "LOOKUP"
@@ -188,49 +180,56 @@ def elaborate(self, platform):
188180 m .d .sync += way_selector .eq (way_selector .rotate_left (1 ))
189181
190182 # Fast path - read requests
191- request_valid = self .req_fifo .read .ready
192- request_addr = Signal (self .addr_layout )
183+ mem_read_addr = Signal (self .addr_layout )
184+ prev_mem_read_addr = Signal (self .addr_layout )
185+ m .d .comb += assign (mem_read_addr , prev_mem_read_addr )
193186
194- tag_hit = [tag_data .valid & (tag_data .tag == request_addr .tag ) for tag_data in self .mem .tag_rd_data ]
195- tag_hit_any = reduce (operator .or_ , tag_hit )
187+ mem_read_output_valid = Signal ()
188+ with Transaction (name = "MemRead" ).body (
189+ m , request = fsm .ongoing ("LOOKUP" ) & (mem_read_output_valid | refill_error_saved )
190+ ):
191+ req_addr = req_zipper .peek_arg (m )
196192
197- mem_out = Signal (self .params .word_width )
198- for i in OneHotSwitchDynamic (m , Cat (tag_hit )):
199- m .d .comb += mem_out .eq (self .mem .data_rd_data [i ])
193+ tag_hit = [tag_data .valid & (tag_data .tag == req_addr .tag ) for tag_data in self .mem .tag_rd_data ]
194+ tag_hit_any = reduce (operator .or_ , tag_hit )
200195
201- instr_out = extract_instr_from_word (m , self .params , mem_out , Value .cast (request_addr ))
196+ with m .If (tag_hit_any | refill_error_saved ):
197+ self .perf_hits .incr (m , cond = tag_hit_any )
198+ mem_out = Signal (self .params .fetch_block_bytes * 8 )
199+ for i in OneHotSwitchDynamic (m , Cat (tag_hit )):
200+ m .d .av_comb += mem_out .eq (self .mem .data_rd_data [i ])
202201
203- refill_error_saved = Signal ()
204- m .d .comb += needs_refill .eq (request_valid & ~ tag_hit_any & ~ refill_error_saved )
202+ req_zipper .write_results (m , fetch_block = mem_out , error = refill_error_saved )
203+ m .d .sync += refill_error_saved .eq (0 )
204+ m .d .sync += mem_read_output_valid .eq (0 )
205+ with m .Else ():
206+ self .perf_misses .incr (m )
205207
206- with Transaction ().body (m , request = request_valid & fsm .ongoing ("LOOKUP" ) & (tag_hit_any | refill_error_saved )):
207- self .perf_errors .incr (m , cond = refill_error_saved )
208- self .perf_misses .incr (m , cond = refill_finish_last )
209- self .perf_hits .incr (m , cond = ~ refill_finish_last )
208+ m .d .comb += needs_refill .eq (1 )
210209
211- self .res_fwd .write (m , instr = instr_out , error = refill_error_saved )
212- m .d .sync += refill_error_saved .eq (0 )
210+ # Align to the beginning of the cache line
211+ aligned_addr = self .serialize_addr (req_addr ) & ~ ((1 << self .params .offset_bits ) - 1 )
212+ log .debug (m , True , "Refilling line 0x{:x}" , aligned_addr )
213+ self .refiller .start_refill (m , addr = aligned_addr )
213214
214215 @def_method (m , self .accept_res )
215216 def _ ():
216- self .req_fifo .read (m )
217217 self .req_latency .stop (m )
218- return self .res_fwd .read (m )
219218
220- mem_read_addr = Signal ( self . addr_layout )
221- m . d . comb += assign ( mem_read_addr , request_addr )
219+ output = req_zipper . read ( m )
220+ return output . results
222221
223222 @def_method (m , self .issue_req , ready = accepting_requests )
224223 def _ (addr : Value ) -> None :
225224 self .perf_loads .incr (m )
226225 self .req_latency .start (m )
227226
228227 deserialized = self .deserialize_addr (addr )
229- # Forward read address only if the method is called
230228 m .d .comb += assign (mem_read_addr , deserialized )
231- m .d .sync += assign (request_addr , deserialized )
229+ m .d .sync += assign (prev_mem_read_addr , deserialized )
230+ req_zipper .write_args (m , deserialized )
232231
233- self . req_fifo . write ( m , deserialized )
232+ m . d . sync += mem_read_output_valid . eq ( 1 )
234233
235234 m .d .comb += [
236235 self .mem .tag_rd_index .eq (mem_read_addr .index ),
@@ -245,34 +244,30 @@ def _(addr: Value) -> None:
245244
246245 @def_method (m , self .flush , ready = accepting_requests )
247246 def _ () -> None :
247+ log .info (m , True , "Flushing the cache..." )
248248 m .d .sync += flush_index .eq (0 )
249249 m .d .comb += flush_start .eq (1 )
250250
251251 m .d .comb += flush_finish .eq (flush_index == self .params .num_of_sets - 1 )
252252
253253 # Slow path - data refilling
254- with Transaction ().body (m , request = fsm .ongoing ("LOOKUP" ) & needs_refill ):
255- # Align to the beginning of the cache line
256- aligned_addr = self .serialize_addr (request_addr ) & ~ ((1 << self .params .offset_bits ) - 1 )
257- self .refiller .start_refill (m , addr = aligned_addr )
258-
259- m .d .sync += refill_finish_last .eq (0 )
260-
261254 with Transaction ().body (m ):
262255 ret = self .refiller .accept_refill (m )
263256 deserialized = self .deserialize_addr (ret .addr )
264257
258+ self .perf_errors .incr (m , cond = ret .error )
259+
265260 m .d .top_comb += [
266261 self .mem .data_wr_addr .index .eq (deserialized ["index" ]),
267262 self .mem .data_wr_addr .offset .eq (deserialized ["offset" ]),
268- self .mem .data_wr_data .eq (ret .data ),
263+ self .mem .data_wr_data .eq (ret .fetch_block ),
269264 ]
270265
271266 m .d .comb += self .mem .data_wr_en .eq (1 )
272267 m .d .comb += refill_finish .eq (ret .last )
273- m .d .sync += refill_finish_last .eq (1 )
274268 m .d .comb += refill_error .eq (ret .error )
275- m .d .sync += refill_error_saved .eq (ret .error )
269+ with m .If (ret .error ):
270+ m .d .sync += refill_error_saved .eq (1 )
276271
277272 with m .If (fsm .ongoing ("FLUSH" )):
278273 m .d .comb += [
@@ -285,9 +280,9 @@ def _() -> None:
285280 with m .Else ():
286281 m .d .comb += [
287282 self .mem .way_wr_en .eq (way_selector ),
288- self .mem .tag_wr_index .eq (request_addr .index ),
283+ self .mem .tag_wr_index .eq (mem_read_addr .index ),
289284 self .mem .tag_wr_data .valid .eq (~ refill_error ),
290- self .mem .tag_wr_data .tag .eq (request_addr .tag ),
285+ self .mem .tag_wr_data .tag .eq (mem_read_addr .tag ),
291286 self .mem .tag_wr_en .eq (refill_finish ),
292287 ]
293288
@@ -301,7 +296,7 @@ class ICacheMemory(Elaboratable):
301296 Writes are multiplexed using one-hot `way_wr_en` signal. Read data lines from all
302297 ways are separately exposed (as an array).
303298
304- The data memory is addressed using a machine word .
299+ The data memory is addressed using fetch blocks .
305300 """
306301
307302 def __init__ (self , params : ICacheParameters ) -> None :
@@ -319,11 +314,13 @@ def __init__(self, params: ICacheParameters) -> None:
319314
320315 self .data_addr_layout = make_layout (("index" , self .params .index_bits ), ("offset" , self .params .offset_bits ))
321316
317+ self .fetch_block_bits = params .fetch_block_bytes * 8
318+
322319 self .data_rd_addr = Signal (self .data_addr_layout )
323- self .data_rd_data = Array ([Signal (self .params . word_width ) for _ in range (self .params .num_of_ways )])
320+ self .data_rd_data = Array ([Signal (self .fetch_block_bits ) for _ in range (self .params .num_of_ways )])
324321 self .data_wr_addr = Signal (self .data_addr_layout )
325322 self .data_wr_en = Signal ()
326- self .data_wr_data = Signal (self .params . word_width )
323+ self .data_wr_data = Signal (self .fetch_block_bits )
327324
328325 def elaborate (self , platform ):
329326 m = TModule ()
@@ -345,17 +342,18 @@ def elaborate(self, platform):
345342 tag_mem_wp .en .eq (self .tag_wr_en & way_wr ),
346343 ]
347344
348- data_mem = Memory (width = self .params .word_width , depth = self .params .num_of_sets * self .params .words_in_block )
345+ data_mem = Memory (
346+ width = self .fetch_block_bits , depth = self .params .num_of_sets * self .params .fetch_blocks_in_line
347+ )
349348 data_mem_rp = data_mem .read_port ()
350349 data_mem_wp = data_mem .write_port ()
351350 m .submodules [f"data_mem_{ i } _rp" ] = data_mem_rp
352351 m .submodules [f"data_mem_{ i } _wp" ] = data_mem_wp
353352
354- # We address the data RAM using machine words , so we have to
353+ # We address the data RAM using fetch blocks , so we have to
355354 # discard a few least significant bits from the address.
356- redundant_offset_bits = exact_log2 (self .params .word_width_bytes )
357- rd_addr = Cat (self .data_rd_addr .offset , self .data_rd_addr .index )[redundant_offset_bits :]
358- wr_addr = Cat (self .data_wr_addr .offset , self .data_wr_addr .index )[redundant_offset_bits :]
355+ rd_addr = Cat (self .data_rd_addr .offset , self .data_rd_addr .index )[self .params .fetch_block_bytes_log :]
356+ wr_addr = Cat (self .data_wr_addr .offset , self .data_wr_addr .index )[self .params .fetch_block_bytes_log :]
359357
360358 m .d .comb += [
361359 self .data_rd_data [i ].eq (data_mem_rp .data ),
0 commit comments