diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 15568c7b..b5166c98 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -248,7 +248,7 @@ // Size of LSU Request Queue `ifndef LSUQ_SIZE -`define LSUQ_SIZE 8 +`define LSUQ_SIZE (`NUM_WARPS * `NUM_THREADS) `endif // Size of FPU Request Queue diff --git a/hw/rtl/VX_gpr_ram_f.v b/hw/rtl/VX_gpr_ram_f.v index 3b800993..68c2a69f 100644 --- a/hw/rtl/VX_gpr_ram_f.v +++ b/hw/rtl/VX_gpr_ram_f.v @@ -20,6 +20,8 @@ module VX_gpr_ram_f #( ); reg [DATAW-1:0] mem [DEPTH-1:0]; + initial mem = '{default: 0}; + always @(posedge clk) begin if (wren) begin mem [waddr] <= wdata; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 39e81b2a..ce4783e1 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -16,36 +16,14 @@ module VX_gpr_stage #( `UNUSED_VAR (reset) `ifdef EXT_F_ENABLE - localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2); - wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f; + localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3; - - wire waddr_is_fp = writeback_if.rd[`NR_BITS-1]; - wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1]; - wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1]; - wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1]; - `UNUSED_VAR (raddr3_is_fp) - assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]}; - assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]}; - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - VX_gpr_ram_i #( - .DATAW (32), - .DEPTH (RAM_DEPTH) - ) gpr_ram_i ( - .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp), - .waddr (waddr), - .wdata (writeback_if.data[i]), - .raddr1 (raddr1), - .raddr2 (raddr2), - .rdata1 (rdata1_i[i]), - .rdata2 (rdata2_i[i]) - ); - end + assign waddr = {writeback_if.wid, writeback_if.rd}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; for (genvar i = 0; i < `NUM_THREADS; i++) begin VX_gpr_ram_f #( @@ -53,29 +31,29 @@ module VX_gpr_stage #( .DEPTH (RAM_DEPTH) ) gpr_ram_f ( .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp), + .wren (writeback_if.valid && writeback_if.tmask[i]), .waddr (waddr), .wdata (writeback_if.data[i]), .raddr1 (raddr1), .raddr2 (raddr2), .raddr3 (raddr3), - .rdata1 (rdata1_f[i]), - .rdata2 (rdata2_f[i]), - .rdata3 (rdata3_f[i]) + .rdata1 (rdata1[i]), + .rdata2 (rdata2[i]), + .rdata3 (rdata3[i]) ); end - assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i; - assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i; - assign gpr_rsp_if.rs3_data = rdata3_f; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; + assign gpr_rsp_if.rs3_data = rdata3; `else localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; - wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2; wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2; assign waddr = {writeback_if.wid, writeback_if.rd}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; `UNUSED_VAR (gpr_req_if.rs3) for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -89,13 +67,13 @@ module VX_gpr_stage #( .wdata (writeback_if.data[i]), .raddr1 (raddr1), .raddr2 (raddr2), - .rdata1 (rdata1_i[i]), - .rdata2 (rdata2_i[i]) + .rdata1 (rdata1[i]), + .rdata2 (rdata2[i]) ); end - assign gpr_rsp_if.rs1_data = rdata1_i; - assign gpr_rsp_if.rs2_data = rdata2_i; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; assign gpr_rsp_if.rs3_data = 0; `endif diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 4660ef3d..ad6115d2 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -85,6 +85,9 @@ module VX_bank #( input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, output wire dram_rsp_ready ); + + localparam MSHR_SIZE_BITS = $clog2(MSHR_SIZE+1); + `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0; @@ -172,8 +175,8 @@ module VX_bank #( ); wire mshr_pop; - reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; - wire [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size_n; + reg [MSHR_SIZE_BITS-1:0] mshr_pending_size; + wire [MSHR_SIZE_BITS-1:0] mshr_pending_size_n; reg mshr_going_full; wire mshr_valid_st0; @@ -264,7 +267,7 @@ module VX_bank #( wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); - wire creq_commit = valid_st2 + wire creq_commit = valid_st2 && !is_fill_st2 && (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2)) && !pipeline_stall; @@ -287,7 +290,7 @@ module VX_bank #( mshr_going_full <= 0; end else begin mshr_pending_size <= mshr_pending_size_n; - mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE); + mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE_BITS'(MSHR_SIZE)); end end @@ -298,13 +301,10 @@ module VX_bank #( assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : drsq_pop_unqual ? drsq_addr_st0 : - creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] : - 0; + creq_addr_st0[`LINE_SELECT_ADDR_RNG]; if (`WORD_SELECT_WIDTH != 0) begin - assign wsel_st0 = creq_pop_unqual ? creq_addr_st0[`WORD_SELECT_WIDTH-1:0] : - mshr_pop_unqual ? mshr_wsel_st0 : - 0; + assign wsel_st0 = creq_pop_unqual ? creq_addr_st0[`WORD_SELECT_WIDTH-1:0] : mshr_wsel_st0; end else begin `UNUSED_VAR (mshr_wsel_st0) assign wsel_st0 = 0; @@ -312,25 +312,15 @@ module VX_bank #( assign writedata_st0 = drsq_filldata_st0; - assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : - creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : - 0; + assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : `REQ_TAG_WIDTH'(creq_tag_st0); - assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : - creq_pop_unqual ? creq_rw_st0 : - 0; + assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : creq_rw_st0; - assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : - creq_pop_unqual ? creq_byteen_st0 : - 0; + assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : creq_byteen_st0; - assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : - creq_pop_unqual ? creq_tid_st0 : - 0; + assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : creq_tid_st0; - assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : - creq_pop_unqual ? creq_writeword_st0 : - 0; + assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : creq_writeword_st0; `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -426,13 +416,13 @@ if (DRAM_ENABLE) begin || (is_mshr_st1 && addr_st1 != addr_st2)) && !incoming_fill_st1; - assign do_writeback_st1 = (WRITE_THROUGH && mem_rw_st1) - || (!WRITE_THROUGH && dirty_st1 && is_fill_st1); + assign do_writeback_st1 = (WRITE_THROUGH && !is_fill_st1 && mem_rw_st1) + || (!WRITE_THROUGH && is_fill_st1 && dirty_st1); assign dreq_push_st1 = do_fill_req_st1 || do_writeback_st1; assign mshr_push_st1 = (miss_st1 || force_miss_st1) - && !(WRITE_THROUGH && mem_rw_st1); + && !(WRITE_THROUGH && !is_fill_st1 && mem_rw_st1); assign crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; @@ -591,6 +581,9 @@ end // or the fill request is comming for this block wire mshr_init_ready_state_st2 = valid_st2 && (!miss_st2 || incoming_fill_qual_st2); + // use dram rsp or core req address to lookup the mshr + wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = drsq_pop_unqual ? drsq_addr_st0 : creq_addr_st0[`LINE_SELECT_ADDR_RNG]; + VX_miss_resrv #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), @@ -622,7 +615,7 @@ end // lookup .lookup_ready (update_ready_st0), - .lookup_addr (addr_st0), + .lookup_addr (lookup_addr), .lookup_match (mshr_pending_hazard_unqual_st0), // schedule diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 1c07d14d..c35444ab 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -53,7 +53,7 @@ module VX_miss_resrv #( // dequeue input wire dequeue ); - `USE_FAST_BRAM reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; + `USE_FAST_BRAM reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table;