generic_register reset network optimization

This commit is contained in:
Blaise Tine
2020-11-29 18:41:36 -08:00
parent def6a35693
commit 5758ef9ebf
21 changed files with 84 additions and 48 deletions

View File

@@ -97,7 +97,8 @@ module VX_alu_unit #(
wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33)
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -64,6 +64,7 @@ module VX_csr_io_arb #(
VX_generic_register #(
.N(1 + 32),
.R(1),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -99,7 +99,8 @@ module VX_csr_unit #(
wire stall_out = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -150,7 +150,8 @@ module VX_fpu_unit #(
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -76,7 +76,8 @@ module VX_gpu_unit #(
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE)
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -68,6 +68,7 @@ module VX_io_arb #(
VX_generic_register #(
.N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)),
.R(`NUM_THREADS),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -74,7 +74,8 @@ module VX_lsu_unit #(
wire stall_in;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32)))
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
.R(1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
@@ -180,7 +181,8 @@ module VX_lsu_unit #(
wire arb_wb = is_store_req ? 0 : rsp_wb;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1)
) pipe_reg1 (
.clk (clk),
.reset (reset),

View File

@@ -63,6 +63,7 @@ module VX_mem_arb #(
VX_generic_register #(
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH),
.R(1),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -144,7 +144,8 @@ module VX_mul_unit #(
wire [`NUM_THREADS-1:0][31:0] result = mul_valid_out ? mul_result : div_result;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -47,7 +47,8 @@ module VX_opd_collect #(
wire stall_out = valid_out && ~ready_out;
VX_generic_register #(
.N(1+INSTW+OPDSW)
.N(1 + INSTW + OPDSW),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -238,7 +238,8 @@ module VX_warp_sched #(
assign scheduled_warp = schedule_valid && ~stall_out;
VX_generic_register #(
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
.N(1 + `NUM_THREADS + 32 + `NW_BITS),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -76,7 +76,8 @@ module VX_writeback #(
always @(*) assert(writeback_if.ready); // the writeback currently has no backpressure from issue stage
VX_generic_register #(
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),

View File

@@ -408,17 +408,18 @@ if (DRAM_ENABLE) begin
// we have a miss in msrq or in stage 3 for the current address
wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
|| (valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH),
.R(1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.in ({is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, valid_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
.out ({is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
.in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
.out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
`ifdef DBG_CACHE_REQ_INFO
@@ -432,8 +433,8 @@ if (DRAM_ENABLE) begin
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
// we have a matching previous request that missed alreedy
wire st2_pending_hazard_st1 = (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1);
wire st3_pending_hazard_st1 = (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1);
wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1);
wire st3_pending_hazard_st1 = valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1);
// force miss to ensure commit order when a new request has pending previous requests to same block
// also force a miss for msrq requests when previous requests got a miss
@@ -483,14 +484,15 @@ if (DRAM_ENABLE) begin
assign misses = miss_st1;
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH)
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH),
.R(1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.in ({is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_inv_st1, is_fill_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
.out ({is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_inv_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
.in ({valid_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
.out ({valid_st2, is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
);
end else begin
@@ -509,7 +511,6 @@ end else begin
assign inst_meta_st1= inst_meta_st0;
assign snp_inv_st1 = snp_inv_st0;
assign addr_st1 = addr_st0;
assign mem_byteen_st1 = 0;
assign dirty_st1 = 0;
assign readtag_st1 = 0;
assign miss_st1 = 0;
@@ -545,7 +546,7 @@ end
end
`endif
assign is_mshr_miss_st2 = (miss_st2 || force_miss_st2) && is_mshr_st2;
assign is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2);
VX_data_access #(
.BANK_ID (BANK_ID),
@@ -602,7 +603,7 @@ end
wire send_snp_rsp_st3;
wire incoming_fill_st3;
wire send_core_rsp_st2 = valid_st2 && !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2;
wire send_core_rsp_st2 = !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2;
// check if a matching fill request is comming
wire incoming_fill_dfp_st2 = dram_rsp_fire && (addr_st2 == dram_rsp_addr);
@@ -612,28 +613,29 @@ end
|| incoming_fill_st0_st2
|| incoming_fill_st1_st2;
wire send_fill_req_st2 = valid_st2 && miss_st2
wire send_fill_req_st2 = miss_st2
&& (!force_miss_st2
|| (is_mshr_st2 && addr_st2 != addr_st3))
&& !incoming_fill_st2;
wire do_writeback_st2 = valid_st2 && dirty_st2
wire do_writeback_st2 = dirty_st2
&& (is_fill_st2
|| (!force_miss_st2 && is_snp_st2));
wire send_dwb_req_st2 = send_fill_req_st2 || do_writeback_st2;
wire send_snp_rsp_st2 = valid_st2 && is_snp_st2 && !force_miss_st2;
wire send_snp_rsp_st2 = is_snp_st2 && !force_miss_st2;
VX_generic_register #(
.N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH)
.N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH),
.R(1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.in ({is_mshr_st2, incoming_fill_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, force_miss_st2, is_snp_st2, snp_inv_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
.out ({is_mshr_st3, incoming_fill_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_inv_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
.in ({valid_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
.out ({valid_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
);
`ifdef DBG_CACHE_REQ_INFO
@@ -644,7 +646,7 @@ end
end
`endif
assign is_mshr_miss_st3 = (miss_st3 || force_miss_st3) && is_mshr_st3;
assign is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3);
// Enqueue to miss reserv if it's a valid miss
@@ -653,7 +655,7 @@ end
wire req_rw_st3;
wire[WORD_SIZE-1:0] req_byteen_st3;
wire mshr_push_unqual = miss_st3 || force_miss_st3;
wire mshr_push_unqual = valid_st3 && (miss_st3 || force_miss_st3);
assign mshr_push_stall = mshr_push_unqual && mshr_full;
wire mshr_push = mshr_push_unqual
@@ -672,7 +674,7 @@ end
// push missed requests as 'ready' if it was a forced miss but actually had a hit
// or the fill request is comming for the missed block
wire mshr_init_ready_state_st3 = !miss_st3 || incoming_fill_st3;
wire mshr_init_ready_state_st3 = valid_st3 && (!miss_st3 || incoming_fill_st3);
VX_cache_miss_resrv #(
.BANK_ID (BANK_ID),
@@ -763,7 +765,7 @@ end
wire cwbq_empty, cwbq_full;
wire cwbq_push_unqual = send_core_rsp_st3 && !req_rw_st3;
wire cwbq_push_unqual = valid_st3 && send_core_rsp_st3 && !req_rw_st3;
assign cwbq_push_stall = cwbq_push_unqual && cwbq_full;
wire cwbq_push = cwbq_push_unqual
@@ -799,7 +801,7 @@ end
wire dwbq_empty, dwbq_full;
wire dwbq_push_unqual = send_dwb_req_st3;
wire dwbq_push_unqual = valid_st3 && send_dwb_req_st3;
assign dwbq_push_stall = dwbq_push_unqual && dwbq_full;
@@ -854,7 +856,7 @@ end
wire snpq_empty, snpq_full;
wire snpq_push_unqual = send_snp_rsp_st3;
wire snpq_push_unqual = valid_st3 && send_snp_rsp_st3;
assign snpq_push_stall = snpq_push_unqual && snpq_full;
@@ -922,7 +924,7 @@ end
`ifdef DBG_PRINT_CACHE_BANK
wire incoming_fill_dfp_st3 = dram_rsp_fire && (addr_st3 == dram_rsp_addr);
always @(posedge clk) begin
if (miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin
if (valid_st3 && miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin
$display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), incoming_fill_st3, incoming_fill_dfp_st3);
assert(!is_mshr_st3);
end

View File

@@ -93,6 +93,7 @@ module VX_cache_core_rsp_merge #(
VX_generic_register #(
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
.R(NUM_REQUESTS),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -48,6 +48,7 @@ module VX_cache_dram_req_arb #(
VX_generic_register #(
.N(1 + `DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
.R(1),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -104,8 +104,8 @@ module VX_data_access #(
end
assign write_enable = valid_in
&& writeen_in
&& !stall;
&& writeen_in
&& !stall;
assign dirtyb_out = read_dirtyb_out;
assign readdata_out = read_data;

View File

@@ -179,6 +179,7 @@ module VX_snp_forwarder #(
VX_generic_register #(
.N(1 + `LOG2UP(SNRQ_SIZE)),
.R(1),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -36,6 +36,7 @@ module VX_snp_rsp_arb #(
VX_generic_register #(
.N(1 + SNP_TAG_WIDTH),
.R(1),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),

View File

@@ -88,7 +88,8 @@ module VX_fp_noncomp #(
wire tmp_ab_equal = (dataa[i] == datab[i]) | (tmp_a_type[4] & tmp_b_type[4]);
VX_generic_register #(
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1)
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1),
.R(0)
) pipe_reg0 (
.clk (clk),
.reset (reset),
@@ -100,7 +101,8 @@ module VX_fp_noncomp #(
end
VX_generic_register #(
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32))
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)),
.R(1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
@@ -250,7 +252,8 @@ module VX_fp_noncomp #(
|| (op_type_r == `FPU_CMP); // CMP
VX_generic_register #(
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
.R(1)
) pipe_reg2 (
.clk (clk),
.reset (reset),

View File

@@ -54,7 +54,9 @@ module VX_cam_buffer #(
end else begin
for (integer i = 0; i < CPORTS; i++) begin
if (release_slot[i]) begin
assert(0 == free_slots[release_addr[i]]) else $error("%t: releasing invalid slot at port %d", $time, release_addr[i]);
assert(0 == free_slots[release_addr[i]]) else begin
$display("%t: releasing invalid slot at port %d", $time, release_addr[i]);
end
end
end
free_slots <= free_slots_n;

View File

@@ -1,7 +1,8 @@
`include "VX_platform.vh"
module VX_generic_register #(
parameter N = 1,
parameter N = 1,
parameter R = N,
parameter PASSTHRU = 0
) (
input wire clk,
@@ -17,13 +18,24 @@ module VX_generic_register #(
`UNUSED_VAR (stall)
assign out = flush ? N'(0) : in;
end else begin
reg [(N-1):0] value;
reg [N-1:0] value;
always @(posedge clk) begin
if (reset || flush) begin
value <= N'(0);
end else if (~stall) begin
value <= in;
if (R != 0) begin
always @(posedge clk) begin
if (~stall) begin
value <= in;
end
if (reset || flush) begin
value[N-1:N-R] <= R'(0);
end
end
end else begin
`UNUSED_VAR (reset)
`UNUSED_VAR (flush)
always @(posedge clk) begin
if (~stall) begin
value <= in;
end
end
end