diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 68af5f73..e4803572 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -67,7 +67,7 @@ module VX_cluster #( wire [`NUM_CORES-1:0] per_core_busy; wire [`NUM_CORES-1:0] per_core_ebreak; - for (genvar i = 0; i < `NUM_CORES; i++) begin + for (genvar i = 0; i < `NUM_CORES; i++) begin VX_core #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) core ( diff --git a/hw/rtl/VX_gpr_ram_f.v b/hw/rtl/VX_gpr_ram_f.v index 3b800993..68c2a69f 100644 --- a/hw/rtl/VX_gpr_ram_f.v +++ b/hw/rtl/VX_gpr_ram_f.v @@ -20,6 +20,8 @@ module VX_gpr_ram_f #( ); reg [DATAW-1:0] mem [DEPTH-1:0]; + initial mem = '{default: 0}; + always @(posedge clk) begin if (wren) begin mem [waddr] <= wdata; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 39e81b2a..ce4783e1 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -16,36 +16,14 @@ module VX_gpr_stage #( `UNUSED_VAR (reset) `ifdef EXT_F_ENABLE - localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2); - wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f; + localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3; - - wire waddr_is_fp = writeback_if.rd[`NR_BITS-1]; - wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1]; - wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1]; - wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1]; - `UNUSED_VAR (raddr3_is_fp) - assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]}; - assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]}; - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - VX_gpr_ram_i #( - .DATAW (32), - .DEPTH (RAM_DEPTH) - ) gpr_ram_i ( - .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp), - .waddr (waddr), - .wdata (writeback_if.data[i]), - .raddr1 (raddr1), - .raddr2 (raddr2), - .rdata1 (rdata1_i[i]), - .rdata2 (rdata2_i[i]) - ); - end + assign waddr = {writeback_if.wid, writeback_if.rd}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; for (genvar i = 0; i < `NUM_THREADS; i++) begin VX_gpr_ram_f #( @@ -53,29 +31,29 @@ module VX_gpr_stage #( .DEPTH (RAM_DEPTH) ) gpr_ram_f ( .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp), + .wren (writeback_if.valid && writeback_if.tmask[i]), .waddr (waddr), .wdata (writeback_if.data[i]), .raddr1 (raddr1), .raddr2 (raddr2), .raddr3 (raddr3), - .rdata1 (rdata1_f[i]), - .rdata2 (rdata2_f[i]), - .rdata3 (rdata3_f[i]) + .rdata1 (rdata1[i]), + .rdata2 (rdata2[i]), + .rdata3 (rdata3[i]) ); end - assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i; - assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i; - assign gpr_rsp_if.rs3_data = rdata3_f; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; + assign gpr_rsp_if.rs3_data = rdata3; `else localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; - wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2; wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2; assign waddr = {writeback_if.wid, writeback_if.rd}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; `UNUSED_VAR (gpr_req_if.rs3) for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -89,13 +67,13 @@ module VX_gpr_stage #( .wdata (writeback_if.data[i]), .raddr1 (raddr1), .raddr2 (raddr2), - .rdata1 (rdata1_i[i]), - .rdata2 (rdata2_i[i]) + .rdata1 (rdata1[i]), + .rdata2 (rdata2[i]) ); end - assign gpr_rsp_if.rs1_data = rdata1_i; - assign gpr_rsp_if.rs2_data = rdata2_i; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; assign gpr_rsp_if.rs3_data = 0; `endif diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 776ab52a..5770eb4a 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -89,6 +89,8 @@ module VX_bank #( output wire dram_rsp_ready ); + localparam MSHR_SIZE_BITS = $clog2(MSHR_SIZE+1); + `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0; @@ -186,8 +188,8 @@ module VX_bank #( ); wire mshr_pop; - reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; - wire [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size_n; + reg [MSHR_SIZE_BITS-1:0] mshr_pending_size; + wire [MSHR_SIZE_BITS-1:0] mshr_pending_size_n; reg mshr_going_full; wire mshr_valid_st0; @@ -278,7 +280,7 @@ module VX_bank #( wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); - wire creq_commit = valid_st2 + wire creq_commit = valid_st2 && !is_fill_st2 && (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2)) && !pipeline_stall; @@ -301,7 +303,7 @@ module VX_bank #( mshr_going_full <= 0; end else begin mshr_pending_size <= mshr_pending_size_n; - mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE); + mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE_BITS'(MSHR_SIZE)); end end @@ -312,13 +314,10 @@ module VX_bank #( assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : drsq_pop_unqual ? drsq_addr_st0 : - creq_pop_unqual ? creq_addr_st0 : - 0; + creq_addr_st0; if (`WORD_SELECT_BITS != 0) begin - assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : - mshr_pop_unqual ? mshr_wsel_st0 : - 0; + assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_wsel_st0; end else begin `UNUSED_VAR (creq_wsel_st0) `UNUSED_VAR (mshr_wsel_st0) @@ -327,25 +326,15 @@ module VX_bank #( assign writedata_st0 = drsq_filldata_st0; - assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : - creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : - 0; + assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : `REQ_TAG_WIDTH'(creq_tag_st0); - assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : - creq_pop_unqual ? creq_rw_st0 : - 0; + assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : creq_rw_st0; - assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : - creq_pop_unqual ? creq_byteen_st0 : - 0; + assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : creq_byteen_st0; - assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : - creq_pop_unqual ? creq_tid_st0 : - 0; + assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : creq_tid_st0; - assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : - creq_pop_unqual ? creq_writeword_st0 : - 0; + assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : creq_writeword_st0; `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -442,13 +431,13 @@ if (DRAM_ENABLE) begin || (is_mshr_st1 && addr_st1 != addr_st2)) && !incoming_fill_st1; - assign do_writeback_st1 = (WRITE_THROUGH && mem_rw_st1) - || (!WRITE_THROUGH && dirty_st1 && is_fill_st1); + assign do_writeback_st1 = (WRITE_THROUGH && !is_fill_st1 && mem_rw_st1) + || (!WRITE_THROUGH && is_fill_st1 && dirty_st1); assign dreq_push_st1 = do_fill_req_st1 || do_writeback_st1; assign mshr_push_st1 = (miss_st1 || force_miss_st1) - && !(WRITE_THROUGH && mem_rw_st1); + && !(WRITE_THROUGH && !is_fill_st1 && mem_rw_st1); assign crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; @@ -607,6 +596,9 @@ end // or the fill request is comming for this block wire mshr_init_ready_state_st2 = valid_st2 && (!miss_st2 || incoming_fill_qual_st2); + // use dram rsp or core req address to lookup the mshr + wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = drsq_pop_unqual ? drsq_addr_st0 : creq_addr_st0; + VX_miss_resrv #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), @@ -638,7 +630,7 @@ end // lookup .lookup_ready (update_ready_st0), - .lookup_addr (addr_st0), + .lookup_addr (lookup_addr), .lookup_match (mshr_pending_hazard_unqual_st0), // schedule diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index aea35057..c5356ef5 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -53,7 +53,7 @@ module VX_miss_resrv #( // dequeue input wire dequeue ); - `USE_FAST_BRAM reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; + `USE_FAST_BRAM reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table; diff --git a/hw/rtl/libs/VX_pipe_register.v b/hw/rtl/libs/VX_pipe_register.v index 60a52d7c..1e503ebd 100644 --- a/hw/rtl/libs/VX_pipe_register.v +++ b/hw/rtl/libs/VX_pipe_register.v @@ -17,25 +17,47 @@ module VX_pipe_register #( `UNUSED_VAR (reset) `UNUSED_VAR (enable) assign data_out = data_in; - end else if (DEPTH == 1) begin - reg [DATAW-1:0] value; - if (RESETW != 0) begin - always @(posedge clk) begin - if (reset) begin - value[DATAW-1:DATAW-RESETW] <= RESETW'(0); - end else if (enable) begin - value <= data_in; - end - end - end else begin + end else if (DEPTH == 1) begin + if (RESETW == 0) begin `UNUSED_VAR (reset) + reg [DATAW-1:0] value; + always @(posedge clk) begin if (enable) begin value <= data_in; end end + assign data_out = value; + end else if (RESETW == DATAW) begin + reg [DATAW-1:0] value; + + always @(posedge clk) begin + if (reset) begin + value <= RESETW'(0); + end else if (enable) begin + value <= data_in; + end + end + assign data_out = value; + end else begin + reg [DATAW-RESETW-1:0] value_d; + reg [RESETW-1:0] value_r; + + always @(posedge clk) begin + if (reset) begin + value_r <= RESETW'(0); + end else if (enable) begin + value_r <= data_in[DATAW-1:DATAW-RESETW]; + end + end + + always @(posedge clk) begin + if (enable) begin + value_d <= data_in[DATAW-RESETW-1:0]; + end + end + assign data_out = {value_r, value_d}; end - assign data_out = value; end else begin VX_shift_register #( .DATAW (DATAW),