fixed l2/l3 caches related bugs

This commit is contained in:
Blaise Tine
2021-01-09 16:32:55 -08:00
parent 5c83c594c1
commit 06945533cf
13 changed files with 354 additions and 274 deletions

231
hw/rtl/cache/VX_bank.v vendored
View File

@@ -84,12 +84,11 @@ module VX_bank #(
// DRAM response
input wire dram_rsp_valid,
input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr,
input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data,
output wire dram_rsp_ready
);
localparam MSHR_SIZE_BITS = $clog2(MSHR_SIZE+1);
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire [31:0] debug_pc_st0, debug_pc_st1, debug_pc_st01;
@@ -98,9 +97,9 @@ module VX_bank #(
`endif
wire drsq_pop;
wire drsq_empty, drsp_empty_next;
wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata;
wire drsq_empty, drsq_empty_next;
wire [`LINE_ADDR_WIDTH-1:0] drsq_addr_next;
wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata_next;
wire drsq_push = dram_rsp_valid && dram_rsp_ready;
@@ -108,8 +107,8 @@ module VX_bank #(
wire drsq_full;
assign dram_rsp_ready = !drsq_full;
VX_input_queue #(
.DATAW ($bits(dram_rsp_data)),
VX_fifo_queue_xt #(
.DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
.SIZE (DRSQ_SIZE),
.FASTRAM (1)
) dram_rsp_queue (
@@ -117,29 +116,31 @@ module VX_bank #(
.reset (reset),
.push (drsq_push),
.pop (drsq_pop),
.data_in (dram_rsp_data),
.data_out(drsq_filldata),
.data_in ({dram_rsp_addr, dram_rsp_data}),
`UNUSED_PIN (data_out),
.empty (drsq_empty),
`UNUSED_PIN (data_out_next),
.empty_next(drsp_empty_next),
.data_out_next ({drsq_addr_next, drsq_filldata_next}),
.empty_next (drsq_empty_next),
.full (drsq_full),
`UNUSED_PIN (almost_full),
`UNUSED_PIN (size)
);
end else begin
`UNUSED_VAR (dram_rsp_valid)
`UNUSED_VAR (dram_rsp_addr)
`UNUSED_VAR (dram_rsp_data)
assign drsq_empty = 1;
assign drsp_empty_next = 1;
assign drsq_filldata = 0;
assign dram_rsp_ready = 0;
assign drsq_empty = 1;
assign drsq_empty_next = 1;
assign drsq_addr_next = 0;
assign drsq_filldata_next = 0;
assign dram_rsp_ready = 0;
end
wire creq_pop;
wire creq_full;
wire creq_empty;
wire [`REQS_BITS-1:0] creq_tid_next;
wire creq_full, creq_empty;
wire creq_rw_next;
wire [WORD_SIZE-1:0] creq_byteen_next;
wire [`REQS_BITS-1:0] creq_tid_next;
`IGNORE_WARNINGS_BEGIN
wire [`WORD_ADDR_WIDTH-1:0] creq_addr_next_unqual;
`IGNORE_WARNINGS_END
@@ -163,7 +164,7 @@ module VX_bank #(
assign creq_wsel_next = 0;
end
VX_input_queue #(
VX_fifo_queue_xt #(
.DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH),
.SIZE (CREQ_SIZE),
.FASTRAM (1)
@@ -173,14 +174,18 @@ module VX_bank #(
.push (creq_push),
.pop (creq_pop),
.data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}),
.data_out_next({creq_tag_next, creq_tid_next, creq_rw_next, creq_byteen_next, creq_addr_next_unqual, creq_writeword_next}),
`UNUSED_PIN (empty_next),
`UNUSED_PIN (data_out),
.empty (creq_empty),
.data_out_next({creq_tag_next, creq_tid_next, creq_rw_next, creq_byteen_next, creq_addr_next_unqual, creq_writeword_next}),
`UNUSED_PIN (empty_next),
.full (creq_full),
`UNUSED_PIN (almost_full),
`UNUSED_PIN (size)
);
wire mshr_pop;
wire mshr_almost_full;
wire mshr_pending_unqual_st0;
wire mshr_valid;
wire mshr_valid_next;
wire [`REQS_BITS-1:0] mshr_tid_next;
@@ -190,48 +195,26 @@ module VX_bank #(
wire [`REQ_TAG_WIDTH-1:0] mshr_tag_next;
wire mshr_rw_next;
wire [WORD_SIZE-1:0] mshr_byteen_next;
reg [`LINE_ADDR_WIDTH-1:0] creq_addr;
reg [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel;
reg [`REQ_TAG_WIDTH-1:0] creq_tag;
reg creq_mem_rw;
reg [WORD_SIZE-1:0] creq_byteen;
reg [`WORD_WIDTH-1:0] creq_writeword;
reg [`REQS_BITS-1:0] creq_tid;
always @(posedge clk) begin
creq_addr <= (mshr_valid_next || !drsp_empty_next) ? mshr_addr_next : creq_addr_next;
creq_wsel <= mshr_valid_next ? mshr_wsel_next : creq_wsel_next;
creq_mem_rw <= mshr_valid_next ? mshr_rw_next : creq_rw_next;
creq_byteen <= mshr_valid_next ? mshr_byteen_next : creq_byteen_next;
creq_writeword <= mshr_valid_next ? mshr_writeword_next : creq_writeword_next;
creq_tid <= mshr_valid_next ? mshr_tid_next : creq_tid_next;
creq_tag <= mshr_valid_next ? `REQ_TAG_WIDTH'(mshr_tag_next) : `REQ_TAG_WIDTH'(creq_tag_next);
end
wire dreq_almost_full;
wire mshr_pop;
reg [MSHR_SIZE_BITS-1:0] mshr_pending_size;
wire [MSHR_SIZE_BITS-1:0] mshr_pending_size_n;
reg mshr_going_full;
wire mshr_pending_hazard_unqual_st0;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1;
wire mem_rw_st0, mem_rw_st1;
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [`WORD_WIDTH-1:0] writeword_st0, writeword_st1;
wire [`CACHE_LINE_WIDTH-1:0] filldata_st0, filldata_st1;
wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1;
wire [`TAG_SELECT_BITS-1:0] readtag_st0, readtag_st1;
wire miss_st0, miss_st1;
wire force_miss_st0, force_miss_st1;
wire dirty_st0;
wire [CACHE_LINE_SIZE-1:0] dirtyb_st0, dirtyb_st1;
wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire mem_rw_st0, mem_rw_st1;
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire do_writeback_st0, do_writeback_st1;
wire writeen_unqual_st0, writeen_unqual_st1;
wire mshr_push_unqual_st0, mshr_push_unqual_st1;
@@ -254,44 +237,39 @@ module VX_bank #(
wire is_mshr_miss_st1 = valid_st1 && is_mshr_st1 && (miss_st1 || force_miss_st1);
wire creq_commit = valid_st1 && !is_fill_st1
&& (core_req_hit_st1 || (WRITE_THROUGH && mem_rw_st1))
&& !pipeline_stall;
// determine which queue to pop next in piority order
wire mshr_pop_unqual = mshr_valid;
wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty;
wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full;
wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_almost_full && !dreq_almost_full;
assign mshr_pop = mshr_pop_unqual && !pipeline_stall
&& !is_mshr_miss_st1; // stop if previous request was a miss
assign drsq_pop = drsq_pop_unqual && !pipeline_stall;
assign creq_pop = creq_pop_unqual && !pipeline_stall;
// MSHR pending size
assign mshr_pending_size_n = mshr_pending_size +
((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0));
always @(posedge clk) begin
if (reset) begin
mshr_pending_size <= 0;
mshr_going_full <= 0;
end else begin
mshr_pending_size <= mshr_pending_size_n;
mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE_BITS'(MSHR_SIZE));
end
end
assign valid_st0 = mshr_pop || drsq_pop || creq_pop;
assign is_mshr_st0 = mshr_pop_unqual;
assign is_fill_st0 = drsq_pop_unqual;
assign valid_st0 = mshr_pop || drsq_pop || creq_pop;
assign is_mshr_st0 = mshr_pop_unqual;
assign is_fill_st0 = drsq_pop_unqual;
assign addr_st0 = creq_addr;
assign wsel_st0 = creq_wsel;
assign mem_rw_st0 = creq_mem_rw;
assign byteen_st0 = creq_byteen;
assign writeword_st0 = creq_writeword;
assign req_tid_st0 = creq_tid;
assign tag_st0 = creq_tag;
assign filldata_st0 = drsq_filldata;
VX_pipe_register #(
.DATAW (`LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + 1 + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH + `CACHE_LINE_WIDTH),
.RESETW (0)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({
mshr_valid_next ? mshr_addr_next : (!drsq_empty_next ? drsq_addr_next : creq_addr_next),
mshr_valid_next ? mshr_wsel_next : creq_wsel_next,
mshr_valid_next ? mshr_rw_next : creq_rw_next,
mshr_valid_next ? mshr_byteen_next : creq_byteen_next,
mshr_valid_next ? mshr_writeword_next : creq_writeword_next,
mshr_valid_next ? mshr_tid_next : creq_tid_next,
mshr_valid_next ? `REQ_TAG_WIDTH'(mshr_tag_next) : `REQ_TAG_WIDTH'(creq_tag_next),
drsq_filldata_next
}),
.data_out ({addr_st0, wsel_st0, mem_rw_st0, byteen_st0, writeword_st0, req_tid_st0, tag_st0, filldata_st0})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
@@ -347,12 +325,12 @@ if (DRAM_ENABLE) begin
// redundant fills
wire is_redundant_fill = is_fill_st0 && !miss_st0;
// we have a miss in mshr for the current address
wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0
|| (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1));
// we have a miss in mshr or going to it for the current address
wire mshr_pending_st0 = mshr_pending_unqual_st0
|| (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1));
// force miss to ensure commit order when a new request has pending previous requests to same block
assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_hazard_st0;
assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_st0;
assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0)
|| (is_fill_st0 && !is_redundant_fill);
@@ -369,7 +347,7 @@ if (DRAM_ENABLE) begin
end else begin
`UNUSED_VAR (mshr_pending_hazard_unqual_st0)
`UNUSED_VAR (mshr_pending_unqual_st0)
`UNUSED_VAR (drsq_push)
`UNUSED_VAR (dirty_st0)
`UNUSED_VAR (writeen_st1)
@@ -400,7 +378,7 @@ end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
.RESETW (1)
) pipe_reg (
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!pipeline_stall),
@@ -482,7 +460,7 @@ end
&& !crsq_push_stall
&& !dreq_push_stall;
wire incoming_fill_st1 = (!drsq_empty && (addr_st1 == addr_st0));
wire incoming_fill_st1 = valid_st0 && is_fill_st0 && (addr_st1 == addr_st0);
if (DRAM_ENABLE) begin
@@ -501,6 +479,7 @@ end
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.ALM_FULL (MSHR_SIZE-1),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
@@ -518,21 +497,22 @@ end
.enqueue_addr (addr_st1),
.enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}),
.enqueue_is_mshr (is_mshr_st1),
.enqueue_ready (mshr_init_ready_state_st1),
.enqueue_as_ready (mshr_init_ready_state_st1),
.enqueue_almfull (mshr_almost_full),
// lookup
.lookup_ready (drsq_pop),
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_hazard_unqual_st0),
.lookup_match (mshr_pending_unqual_st0),
// schedule
.schedule (mshr_pop),
.schedule_valid (mshr_valid),
.schedule_valid_next(mshr_valid_next),
.schedule_addr_next (mshr_addr_next),
.schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}),
`UNUSED_PIN (schedule_addr),
`UNUSED_PIN (schedule_data),
.schedule_valid_next(mshr_valid_next),
.schedule_addr_next (mshr_addr_next),
.schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}),
// dequeue
.dequeue (mshr_dequeue_st1)
@@ -545,15 +525,16 @@ end
`UNUSED_VAR (mem_rw_st1)
`UNUSED_VAR (byteen_st1)
`UNUSED_VAR (incoming_fill_st1)
assign mshr_pending_hazard_unqual_st0 = 0;
assign mshr_valid = 0;
assign mshr_valid_next = 0;
assign mshr_addr_next = 0;
assign mshr_wsel_next = 0;
assign mshr_almost_full = 0;
assign mshr_pending_unqual_st0 = 0;
assign mshr_valid = 0;
assign mshr_valid_next = 0;
assign mshr_addr_next = 0;
assign mshr_wsel_next = 0;
assign mshr_writeword_next = 0;
assign mshr_tid_next = 0;
assign mshr_tag_next = 0;
assign mshr_rw_next = 0;
assign mshr_tid_next = 0;
assign mshr_tag_next = 0;
assign mshr_rw_next = 0;
assign mshr_byteen_next = 0;
end
@@ -607,13 +588,13 @@ end
// Enqueue DRAM request
wire dreq_empty, dreq_full;
wire dreq_empty;
wire dreq_push_unqual = valid_st1 && dreq_push_st1;
assign dreq_push_stall = dreq_push_unqual && dreq_full;
assign dreq_push_stall = 0;
wire dreq_push = dreq_push_unqual
&& !dreq_full
wire dreq_push = dreq_push_unqual
&& (do_writeback_st1 || !incoming_fill_st1)
&& !mshr_push_stall
&& !crsq_push_stall;
@@ -645,16 +626,11 @@ end
assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}};
if (DRAM_ENABLE) begin
always @(posedge clk) begin
assert (!(dreq_push && !do_writeback_st1 && incoming_fill_st1))
else $error("%t: incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
end
VX_fifo_queue #(
if (DRAM_ENABLE) begin
VX_fifo_queue_xt #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (DREQ_SIZE),
.BUFFERED (1),
.ALM_FULL (DREQ_SIZE-1),
.FASTRAM (1)
) dram_req_queue (
.clk (clk),
@@ -664,7 +640,10 @@ end
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dreq_empty),
.full (dreq_full),
.almost_full (dreq_almost_full),
`UNUSED_PIN (full),
`UNUSED_PIN (data_out_next),
`UNUSED_PIN (empty_next),
`UNUSED_PIN (size)
);
end else begin
@@ -678,9 +657,9 @@ end
`UNUSED_VAR (readdata_st1)
`UNUSED_VAR (writeback)
`UNUSED_VAR (dram_req_ready)
assign dreq_empty = 1;
assign dreq_full = 0;
assign dram_req_rw = 0;
assign dreq_empty = 1;
assign dreq_almost_full = 0;
assign dram_req_rw = 0;
assign dram_req_byteen = 0;
assign dram_req_addr = 0;
assign dram_req_data = 0;
@@ -689,9 +668,7 @@ end
assign dram_req_valid = !dreq_empty;
// bank pipeline stall
assign pipeline_stall = mshr_push_stall
|| crsq_push_stall
|| dreq_push_stall;
assign pipeline_stall = crsq_push_stall;
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
@@ -708,23 +685,27 @@ end
`ifdef PERF_ENABLE
assign perf_read_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && !mem_rw_st1;
assign perf_write_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && mem_rw_st1;
assign perf_mshr_stalls = mshr_going_full;
assign perf_pipe_stalls = pipeline_stall || mshr_going_full;
assign perf_pipe_stalls = pipeline_stall || mshr_almost_full || dreq_going_full;
assign perf_mshr_stalls = mshr_almost_full;
`endif
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin
$display("%t: miss with incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
assert(!is_mshr_st1);
end
if (pipeline_stall) begin
$display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall);
end
if (drsq_pop) begin
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata);
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), filldata_st0);
end
if (creq_pop || mshr_pop) begin
if (creq_mem_rw)
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, creq_tag, creq_tid, creq_byteen, creq_writeword, debug_wid_st0, debug_pc_st0);
if (mem_rw_st0)
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, tag_st0, req_tid_st0, byteen_st0, writeword_st0, debug_wid_st0, debug_pc_st0);
else
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, creq_tag, creq_tid, creq_byteen, debug_wid_st0, debug_pc_st0);
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, tag_st0, req_tid_st0, byteen_st0, debug_wid_st0, debug_pc_st0);
end
if (crsq_push) begin
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag_st1, crsq_tid_st1, crsq_data_st1, debug_wid_st1, debug_pc_st1);