fixed l3cache hang using memory arbiter in afu

This commit is contained in:
Blaise Tine
2020-11-15 06:36:32 -08:00
parent 2e0f51af80
commit 5d58bf3d11
20 changed files with 514 additions and 388 deletions

View File

@@ -553,42 +553,42 @@ module VX_cluster #(
VX_mem_arb #(
.NUM_REQUESTS (`L2NUM_REQUESTS),
.WORD_SIZE (`L2BANK_LINE_SIZE),
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`DDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH)
) dram_arb (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
// Core request
.mem_req_valid_in (core_dram_req_valid),
.mem_req_rw_in (core_dram_req_rw),
.mem_req_byteen_in (core_dram_req_byteen),
.mem_req_addr_in (core_dram_req_addr),
.mem_req_data_in (core_dram_req_data),
.mem_req_tag_in (core_dram_req_tag),
.mem_req_ready_in (core_dram_req_ready),
.req_valid_in (core_dram_req_valid),
.req_rw_in (core_dram_req_rw),
.req_byteen_in (core_dram_req_byteen),
.req_addr_in (core_dram_req_addr),
.req_data_in (core_dram_req_data),
.req_tag_in (core_dram_req_tag),
.req_ready_in (core_dram_req_ready),
// Core response
.mem_rsp_valid_in (core_dram_rsp_valid),
.mem_rsp_data_in (core_dram_rsp_data),
.mem_rsp_tag_in (core_dram_rsp_tag),
.mem_rsp_ready_in (core_dram_rsp_ready),
.rsp_valid_out (core_dram_rsp_valid),
.rsp_data_out (core_dram_rsp_data),
.rsp_tag_out (core_dram_rsp_tag),
.rsp_ready_out (core_dram_rsp_ready),
// DRAM request
.mem_req_valid_out (dram_req_valid),
.mem_req_rw_out (dram_req_rw),
.mem_req_byteen_out (dram_req_byteen),
.mem_req_addr_out (dram_req_addr),
.mem_req_data_out (dram_req_data),
.mem_req_tag_out (dram_req_tag),
.mem_req_ready_out (dram_req_ready),
.req_valid_out (dram_req_valid),
.req_rw_out (dram_req_rw),
.req_byteen_out (dram_req_byteen),
.req_addr_out (dram_req_addr),
.req_data_out (dram_req_data),
.req_tag_out (dram_req_tag),
.req_ready_out (dram_req_ready),
// DRAM response
.mem_rsp_valid_out (dram_rsp_valid),
.mem_rsp_tag_out (dram_rsp_tag),
.mem_rsp_data_out (dram_rsp_data),
.mem_rsp_ready_out (dram_rsp_ready)
.rsp_valid_in (dram_rsp_valid),
.rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
.rsp_ready_in (dram_rsp_ready)
);
end

View File

@@ -234,10 +234,10 @@
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CORE_REQ_INFO // pc, rd, wid
`define DBG_CORE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`else
`define DBG_CORE_REQ_MDATAW 0
`define DBG_CACHE_REQ_MDATAW 0
`endif
////////////////////////// Dcache Configurable Knobs //////////////////////////
@@ -249,7 +249,7 @@
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
// Core request tag bits
`define DCORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
// DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
@@ -287,7 +287,7 @@
`define ICORE_TAG_ID_BITS `NW_BITS
// Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
// DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)

View File

@@ -39,10 +39,6 @@ module VX_gpr_stage #(
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rs1_is_zero <= 0;
rs2_is_zero <= 0;
end else begin
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;

View File

@@ -45,7 +45,7 @@ module VX_icache_stage #(
// Can accept new request?
assign ifetch_req_if.ready = icache_req_if.ready;
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
`else
assign icache_req_if.tag = req_tag;

View File

@@ -144,7 +144,7 @@ module VX_lsu_unit #(
assign dcache_req_if.addr = req_addr;
assign dcache_req_if.data = req_data;
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag};
`else
assign dcache_req_if.tag = req_tag;

View File

@@ -2,46 +2,46 @@
module VX_mem_arb #(
parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1,
parameter DATA_WIDTH = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter WORD_WIDTH = WORD_SIZE * 8,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter REQS_BITS = `CLOG2(NUM_REQUESTS)
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQUESTS-1:0] mem_req_valid_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in,
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [NUM_REQUESTS-1:0] mem_req_rw_in,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in,
output wire [NUM_REQUESTS-1:0] mem_req_ready_in,
input wire [NUM_REQUESTS-1:0] req_valid_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQUESTS-1:0] req_rw_in,
input wire [NUM_REQUESTS-1:0][DATA_SIZE-1:0] req_byteen_in,
input wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] req_data_in,
output wire [NUM_REQUESTS-1:0] req_ready_in,
// input response
output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in,
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in,
input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in,
output wire [NUM_REQUESTS-1:0] rsp_valid_out,
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQUESTS-1:0] rsp_ready_out,
// output request
output wire mem_req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out,
output wire [ADDR_WIDTH-1:0] mem_req_addr_out,
output wire mem_req_rw_out,
output wire [WORD_SIZE-1:0] mem_req_byteen_out,
output wire [WORD_WIDTH-1:0] mem_req_data_out,
input wire mem_req_ready_out,
output wire req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
output wire [ADDR_WIDTH-1:0] req_addr_out,
output wire req_rw_out,
output wire [DATA_SIZE-1:0] req_byteen_out,
output wire [DATA_WIDTH-1:0] req_data_out,
input wire req_ready_out,
// output response
input wire mem_rsp_valid_out,
input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out,
input wire [WORD_WIDTH-1:0] mem_rsp_data_out,
output wire mem_rsp_ready_out
input wire rsp_valid_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [DATA_WIDTH-1:0] rsp_data_in,
output wire rsp_ready_in
);
if (NUM_REQUESTS > 1) begin
@@ -53,59 +53,59 @@ module VX_mem_arb #(
) req_arb (
.clk (clk),
.reset (reset),
.requests (mem_req_valid_in),
.requests (req_valid_in),
`UNUSED_PIN (grant_valid),
.grant_index (req_idx),
.grant_onehot (req_1hot)
);
wire stall = ~mem_req_ready_out && mem_req_valid_out;
wire stall = ~req_ready_out && req_valid_out;
VX_generic_register #(
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH),
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({mem_req_valid_in[req_idx], {mem_req_tag_in[req_idx], REQS_BITS'(req_idx)}, mem_req_addr_in[req_idx], mem_req_rw_in[req_idx], mem_req_byteen_in[req_idx], mem_req_data_in[req_idx]}),
.out ({mem_req_valid_out, mem_req_tag_out, mem_req_addr_out, mem_req_rw_out, mem_req_byteen_out, mem_req_data_out})
.in ({req_valid_in[req_idx], {req_tag_in[req_idx], REQS_BITS'(req_idx)}, req_addr_in[req_idx], req_rw_in[req_idx], req_byteen_in[req_idx], req_data_in[req_idx]}),
.out ({req_valid_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out})
);
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_req_ready_in[i] = req_1hot[i] && ~stall;
assign req_ready_in[i] = req_1hot[i] && ~stall;
end
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0];
wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (rsp_sel == REQS_BITS'(i));
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign mem_rsp_data_in[i] = mem_rsp_data_out;
assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i));
assign rsp_tag_out[i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH];
assign rsp_data_out[i] = rsp_data_in;
end
assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel];
assign rsp_ready_in = rsp_ready_out[rsp_sel];
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_tag_out = mem_req_tag_in;
assign mem_req_addr_out = mem_req_addr_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in;
assign mem_req_data_out = mem_req_data_in;
assign mem_req_ready_in = mem_req_ready_out;
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign mem_rsp_valid_in = mem_rsp_valid_out;
assign mem_rsp_tag_in = mem_rsp_tag_out;
assign mem_rsp_data_in = mem_rsp_data_out;
assign mem_rsp_ready_out = mem_rsp_ready_in;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end

View File

@@ -64,23 +64,27 @@ module VX_scoreboard #(
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
end
end
`endif
reg [31:0] stall_ctr;
always @(posedge clk) begin
if (reset) begin
stall_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
stall_ctr <= stall_ctr + 1;
assert(stall_ctr < 100000) else $error("%t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
stall_ctr <= stall_ctr + 1;
if (stall_ctr >= 2000) begin
$fflush();
assert(0);
end
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
stall_ctr <= 0;
end
end
`endif
end
endmodule

View File

@@ -100,7 +100,7 @@ module VX_bank #(
output wire misses
);
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_pc_st0;
wire[`NR_BITS-1:0] debug_rd_st0;
@@ -352,7 +352,7 @@ module VX_bank #(
wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
end else begin
@@ -371,7 +371,7 @@ module VX_bank #(
.out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end else begin
@@ -420,7 +420,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st1),
.debug_rd (debug_rd_st1),
.debug_wid (debug_wid_st1),
@@ -474,7 +474,7 @@ module VX_bank #(
.out ({is_msrq_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
);
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end else begin
@@ -498,7 +498,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st2),
.debug_rd (debug_rd_st2),
.debug_wid (debug_wid_st2),
@@ -562,7 +562,7 @@ module VX_bank #(
.out ({is_msrq_st3, send_core_rsp_st3, send_fill_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_invalidate_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
);
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
end else begin
@@ -623,7 +623,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
.debug_pc_st0 (debug_pc_st0),
.debug_rd_st0 (debug_rd_st0),
.debug_wid_st0 (debug_wid_st0),

View File

@@ -3,7 +3,7 @@
`include "VX_platform.vh"
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
`include "VX_define.vh"
`endif

View File

@@ -24,7 +24,7 @@ module VX_cache_miss_resrv #(
input wire clk,
input wire reset,
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc_st0,
input wire[`NR_BITS-1:0] debug_rd_st0,

View File

@@ -25,7 +25,7 @@ module VX_data_access #(
input wire clk,
input wire reset,
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc,
input wire[`NR_BITS-1:0] debug_rd,

View File

@@ -25,7 +25,7 @@ module VX_tag_access #(
input wire clk,
input wire reset,
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc,
input wire[`NR_BITS-1:0] debug_rd,
@@ -122,7 +122,7 @@ module VX_tag_access #(
assign readtag_out = use_read_tag;
assign writeen_out = (use_do_write || use_do_fill);
`ifdef DBG_PRINT_CACHE_DATA
`ifdef DBG_PRINT_CACHE_TAG
always @(posedge clk) begin
if (valid_in && !stall) begin
if (use_do_fill && tags_match) begin