pipeline refactoring - fmax >= 222 mhz

This commit is contained in:
Blaise Tine
2020-08-14 21:50:14 -07:00
parent 71a46d04b9
commit 6c12391338
107 changed files with 1392 additions and 1239 deletions

View File

@@ -108,7 +108,7 @@ module VX_bank #(
wire[31:0] debug_pc_st0;
wire debug_wb_st0;
wire[`NR_BITS-1:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
wire[`NW_BITS-1:0] debug_wid_st0;
wire debug_rw_st0;
wire[WORD_SIZE-1:0] debug_byteen_st0;
wire[`REQS_BITS-1:0] debug_tid_st0;
@@ -117,7 +117,7 @@ module VX_bank #(
wire[31:0] debug_pc_st1e;
wire debug_wb_st1e;
wire[`NR_BITS-1:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
wire[`NW_BITS-1:0] debug_wid_st1e;
wire debug_rw_st1e;
wire[WORD_SIZE-1:0] debug_byteen_st1e;
wire[`REQS_BITS-1:0] debug_tid_st1e;
@@ -126,7 +126,7 @@ module VX_bank #(
wire[31:0] debug_pc_st2;
wire debug_wb_st2;
wire[`NR_BITS-1:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
wire[`NW_BITS-1:0] debug_wid_st2;
wire debug_rw_st2;
wire[WORD_SIZE-1:0] debug_byteen_st2;
wire[`REQS_BITS-1:0] debug_tid_st2;
@@ -271,10 +271,9 @@ module VX_bank #(
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
`DEBUG_END
integer j;
always @(*) begin
is_fill_in_pipe = 0;
for (j = 0; j < STAGE_1_CYCLES; j++) begin
for (integer j = 0; j < STAGE_1_CYCLES; j++) begin
if (is_fill_st1[j]) begin
is_fill_in_pipe = 1;
end
@@ -360,7 +359,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
end
`endif
@@ -375,8 +374,7 @@ module VX_bank #(
.out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES; i++) begin
for (genvar i = 1; i < STAGE_1_CYCLES; i++) begin
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
) s0_1_cc (
@@ -446,13 +444,13 @@ module VX_bank #(
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
`ifdef DBG_CORE_REQ_INFO
.debug_pc_st1e(debug_pc_st1e),
.debug_wb_st1e(debug_wb_st1e),
.debug_rd_st1e(debug_rd_st1e),
.debug_warp_num_st1e(debug_warp_num_st1e),
.debug_wid_st1e(debug_wid_st1e),
.debug_tagid_st1e(debug_tagid_st1e),
`endif
`endif
.stall (stall_bank_pipe),
.stall_bank_pipe(stall_bank_pipe),
@@ -490,7 +488,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_wid_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
end
`endif
@@ -531,7 +529,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end
`endif
@@ -543,10 +541,10 @@ module VX_bank #(
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
wire miss_add = miss_add_unqual
&& !mrvq_full
&& !(cwbq_push_stall
|| dwbq_push_stall
|| dram_fill_req_stall);
&& !mrvq_full
&& !(cwbq_push_stall
|| dwbq_push_stall
|| dram_fill_req_stall);
assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls
@@ -718,7 +716,9 @@ module VX_bank #(
always @(posedge clk) begin
if (reset) begin
dwbq_dual_valid_sel <= 0;
end else if (dwbq_is_dwb_out && dwbq_is_snp_out && (dram_wb_req_fire || snp_rsp_fire)) begin
end else if (dwbq_is_dwb_out
&& dwbq_is_snp_out
&& (dram_wb_req_fire || snp_rsp_fire)) begin
dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel;
end
end

View File

@@ -132,12 +132,12 @@ module VX_cache #(
wire[31:0] debug_core_req_use_pc;
wire debug_core_req_wb;
wire[`NR_BITS-1:0] debug_core_req_rd;
wire[`NW_BITS-1:0] debug_core_req_warp_num;
wire[`NW_BITS-1:0] debug_core_req_wid;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
/* verilator lint_on UNUSED */
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0];
end
`endif
@@ -246,10 +246,8 @@ module VX_cache #(
assign dram_req_tag = dram_req_addr;
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;

View File

@@ -18,12 +18,10 @@ module VX_cache_core_req_bank_sel #(
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
output wire core_req_ready
);
integer i;
if (NUM_BANKS == 1) begin
always @(*) begin
per_bank_valid = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid[0][i] = core_req_valid[i];
end
end
@@ -33,7 +31,7 @@ module VX_cache_core_req_bank_sel #(
always @(*) begin
per_bank_valid = 0;
per_bank_ready_sel = {NUM_BANKS{1'b1}};
for (i = 0; i < NUM_REQUESTS; i++) begin
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
end

View File

@@ -48,14 +48,12 @@ module VX_cache_core_rsp_merge #(
wire stall = ~core_rsp_ready && (| core_rsp_valid);
integer i;
if (CORE_TAG_ID_BITS != 0) begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
for (i = 0; i < NUM_BANKS; i++) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
@@ -71,7 +69,7 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 0;
core_rsp_tag_unqual = 0;
for (i = 0; i < NUM_BANKS; i++) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `BANK_BITS'(i))

View File

@@ -106,8 +106,7 @@ module VX_cache_dram_req_arb #(
`UNUSED_PIN (grant_onehot)
);
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
end

View File

@@ -77,9 +77,8 @@ module VX_cache_miss_resrv #(
reg [MRVQ_SIZE-1:0] make_ready;
reg [MRVQ_SIZE-1:0] make_ready_push;
reg [MRVQ_SIZE-1:0] valid_address_match;
genvar i;
for (i = 0; i < MRVQ_SIZE; i++) begin
for (genvar i = 0; i < MRVQ_SIZE; i++) begin
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
end
@@ -121,7 +120,6 @@ module VX_cache_miss_resrv #(
head_ptr <= 0;
tail_ptr <= 0;
end else begin
if (mrvq_push) begin
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
@@ -157,11 +155,10 @@ module VX_cache_miss_resrv #(
end
`ifdef DBG_PRINT_CACHE_MSRQ
integer j;
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
for (integer j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");

View File

@@ -83,9 +83,7 @@ module VX_snp_forwarder #(
end
end
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
@@ -110,7 +108,7 @@ module VX_snp_forwarder #(
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
end

View File

@@ -34,8 +34,7 @@ module VX_snp_rsp_arb #(
assign snp_rsp_valid = fsq_valid;
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i));
end

View File

@@ -30,7 +30,7 @@ module VX_tag_data_access #(
input wire[31:0] debug_pc_st1e,
input wire debug_wb_st1e,
input wire[`NR_BITS-1:0] debug_rd_st1e,
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
input wire[`NW_BITS-1:0] debug_wid_st1e,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
`IGNORE_WARNINGS_END
`endif
@@ -135,8 +135,7 @@ module VX_tag_data_access #(
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
for (genvar i = 1; i < STAGE_1_CYCLES-1; i++) begin
VX_generic_register #(
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc (
@@ -157,11 +156,11 @@ module VX_tag_data_access #(
if (`WORD_SELECT_WIDTH != 0) begin
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
for (i = 0; i < WORD_SIZE; i++) begin
for (genvar i = 0; i < WORD_SIZE; i++) begin
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end
end else begin
for (i = 0; i < WORD_SIZE; i++) begin
for (genvar i = 0; i < WORD_SIZE; i++) begin
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end
end
@@ -176,7 +175,7 @@ module VX_tag_data_access #(
&& ~is_snp_st1e
&& ~real_writefill;
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
&& should_write;
@@ -218,15 +217,15 @@ module VX_tag_data_access #(
if (valid_req_st1e) begin
if ((| use_write_enable)) begin
if (writefill_st1e) begin
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
$display("%t: bank%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
end else begin
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
$display("%t: bank%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
end
end else
if (miss_st1e) begin
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
$display("%t: bank%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
end else begin
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
$display("%t: bank%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
end
end
end