diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 8eade186..ee9da2e9 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -19,12 +19,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO -#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 +#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -#DEBUG=1 +DEBUG=1 #SCOPE=1 CFLAGS += -fPIC diff --git a/hw/opae/Makefile b/hw/opae/Makefile index 6c57a567..00814dc7 100644 --- a/hw/opae/Makefile +++ b/hw/opae/Makefile @@ -19,7 +19,7 @@ ase-2c: gen_sources setup-ase-2c ase-4c: gen_sources setup-ase-4c make -C $(ASE_BUILD_DIR)_4c - cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_3c/work + cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_4c/work setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile diff --git a/hw/opae/sources_4c.txt b/hw/opae/sources_4c.txt index 4772d476..ede9a77f 100644 --- a/hw/opae/sources_4c.txt +++ b/hw/opae/sources_4c.txt @@ -1,5 +1,5 @@ +define+NUM_CORES=4 -+define+L2_ENABLE=0 ++define+L2_ENABLE=1 +define+SYNTHESIS +define+QUARTUS +define+FPU_FAST diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 71762790..7772f032 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -8,7 +8,7 @@ `endif `ifndef NUM_CORES -`define NUM_CORES 4 +`define NUM_CORES 2 `endif `ifndef NUM_WARPS @@ -250,11 +250,6 @@ `define IBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE `endif -// Number of banks {1, 2, 4, 8,...} -`ifndef INUM_BANKS -`define INUM_BANKS 1 -`endif - // Size of a word in bytes `ifndef IWORD_SIZE `define IWORD_SIZE 4 diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index 99028363..9dace0a6 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -33,52 +33,66 @@ module VX_csr_io_arb #( output wire [31:0] csr_io_rsp_data_out, input wire csr_io_rsp_ready_out ); - if (NUM_REQUESTS == 1) begin + if (NUM_REQUESTS > 1) begin + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i)); + assign csr_io_req_addr_out[i] = csr_io_req_addr_in; + assign csr_io_req_rw_out[i] = csr_io_req_rw_in; + assign csr_io_req_data_out[i] = csr_io_req_data_in; + end + + assign csr_io_req_ready_in = csr_io_req_ready_out[request_id]; + + /////////////////////////////////////////////////////////////////////// + + wire [REQS_BITS-1:0] rsp_idx; + wire [NUM_REQUESTS-1:0] rsp_1hot; + + VX_fixed_arbiter #( + .N(NUM_REQUESTS) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .requests (csr_io_rsp_valid_in), + `UNUSED_PIN (grant_valid), + .grant_index (rsp_idx), + .grant_onehot (rsp_1hot) + ); + + wire stall = csr_io_rsp_valid_out && ~csr_io_rsp_ready_out; + + VX_generic_register #( + .N(1 + 32), + .PASSTHRU(NUM_REQUESTS <= 2) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({csr_io_rsp_valid_in[rsp_idx], csr_io_rsp_data_in[rsp_idx]}), + .out ({csr_io_rsp_valid_out, csr_io_rsp_data_out}) + ); + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign csr_io_rsp_ready_in[i] = rsp_1hot[i] && ~stall; + end + + end else begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (request_id) - assign csr_io_req_valid_out = csr_io_req_valid_in; - assign csr_io_req_rw_out = csr_io_req_rw_in; - assign csr_io_req_addr_out = csr_io_req_addr_in; - assign csr_io_req_data_out = csr_io_req_data_in; - assign csr_io_req_ready_in = csr_io_req_ready_out; + assign csr_io_req_valid_out = csr_io_req_valid_in; + assign csr_io_req_addr_out = csr_io_req_addr_in; + assign csr_io_req_rw_out = csr_io_req_rw_in; + assign csr_io_req_data_out = csr_io_req_data_in; + assign csr_io_req_ready_in = csr_io_req_ready_out; - assign csr_io_rsp_valid_out = csr_io_rsp_valid_in; - assign csr_io_rsp_data_out = csr_io_rsp_data_in; - assign csr_io_rsp_ready_in = csr_io_rsp_ready_out; - - end else begin - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i)); - assign csr_io_req_rw_out[i] = csr_io_req_rw_in; - assign csr_io_req_addr_out[i] = csr_io_req_addr_in; - assign csr_io_req_data_out[i] = csr_io_req_data_in; - end - - assign csr_io_req_ready_in = csr_io_req_ready_out[request_id]; - - reg [REQS_BITS-1:0] bus_rsp_sel; - - VX_fixed_arbiter #( - .N(NUM_REQUESTS) - ) arbiter ( - .clk (clk), - .reset (reset), - .requests (csr_io_rsp_valid_in), - .grant_index (bus_rsp_sel), - `UNUSED_PIN (grant_valid), - `UNUSED_PIN (grant_onehot) - ); - - assign csr_io_rsp_valid_out = csr_io_rsp_valid_in [bus_rsp_sel]; - assign csr_io_rsp_data_out = csr_io_rsp_data_in [bus_rsp_sel]; - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign csr_io_rsp_ready_in[i] = csr_io_rsp_ready_out && (bus_rsp_sel == `REQS_BITS'(i)); - end + assign csr_io_rsp_valid_out = csr_io_rsp_valid_in; + assign csr_io_rsp_data_out = csr_io_rsp_data_in; + assign csr_io_rsp_ready_in = csr_io_rsp_ready_out; end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 9c8b19dd..a77a4407 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -274,6 +274,9 @@ // Cache ID `define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1 +// Number of banks +`define INUM_BANKS 1 + // Core request address bits `define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE)) diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 87a6606d..0c11b9eb 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -8,10 +8,10 @@ module VX_gpu_unit #( input wire clk, input wire reset, - // Input + // Inputs VX_gpu_req_if gpu_req_if, - // Output + // Outputs VX_warp_ctl_if warp_ctl_if, VX_exu_to_cmt_if gpu_commit_if ); diff --git a/hw/rtl/VX_io_arb.v b/hw/rtl/VX_io_arb.v index eeb78488..32d06587 100644 --- a/hw/rtl/VX_io_arb.v +++ b/hw/rtl/VX_io_arb.v @@ -14,94 +14,103 @@ module VX_io_arb #( input wire reset, // input requests - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in, + input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in, input wire [NUM_REQUESTS-1:0] io_req_rw_in, input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in, output wire [NUM_REQUESTS-1:0] io_req_ready_in, // input response output wire [NUM_REQUESTS-1:0] io_rsp_valid_in, - output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in, output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in, + output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in, input wire [NUM_REQUESTS-1:0] io_rsp_ready_in, // output request - output wire [`NUM_THREADS-1:0] io_req_valid_out, + output wire [`NUM_THREADS-1:0] io_req_valid_out, + output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out, + output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out, output wire io_req_rw_out, - output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out, - output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out, - output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out, - output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out, + output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out, + output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out, input wire io_req_ready_out, // output response input wire io_rsp_valid_out, - input wire [WORD_WIDTH-1:0] io_rsp_data_out, input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out, + input wire [WORD_WIDTH-1:0] io_rsp_data_out, output wire io_rsp_ready_out ); - if (NUM_REQUESTS == 1) begin + if (NUM_REQUESTS > 1) begin + + wire [NUM_REQUESTS-1:0] valids; + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign valids[i] = (| io_req_valid_in[i]); + end + + wire [REQS_BITS-1:0] req_idx; + wire [NUM_REQUESTS-1:0] req_1hot; + + VX_rr_arbiter #( + .N(NUM_REQUESTS) + ) req_arb ( + .clk (clk), + .reset (reset), + .requests (valids), + `UNUSED_PIN (grant_valid), + .grant_index (req_idx), + .grant_onehot (req_1hot) + ); + + wire stall = (| io_req_valid_out) && ~io_req_ready_out; + + VX_generic_register #( + .N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)), + .PASSTHRU(NUM_REQUESTS <= 2) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({io_req_valid_in[req_idx], {io_req_tag_in[req_idx], REQS_BITS'(req_idx)}, io_req_addr_in[req_idx], io_req_rw_in[req_idx], io_req_byteen_in[req_idx], io_req_data_in[req_idx]}), + .out ({io_req_valid_out, io_req_tag_out, io_req_addr_out, io_req_rw_out, io_req_byteen_out, io_req_data_out}) + ); + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign io_req_ready_in[i] = req_1hot[i] && ~stall; + end + + /////////////////////////////////////////////////////////////////////// + + wire [REQS_BITS-1:0] rsp_sel = io_rsp_tag_out[REQS_BITS-1:0]; + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign io_rsp_valid_in[i] = io_rsp_valid_out && (rsp_sel == REQS_BITS'(i)); + assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; + assign io_rsp_data_in[i] = io_rsp_data_out; + end + assign io_rsp_ready_out = io_rsp_ready_in[rsp_sel]; + + end else begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign io_req_valid_out = io_req_valid_in; + assign io_req_tag_out = io_req_tag_in; + assign io_req_addr_out = io_req_addr_in; assign io_req_rw_out = io_req_rw_in; assign io_req_byteen_out = io_req_byteen_in; - assign io_req_addr_out = io_req_addr_in; assign io_req_data_out = io_req_data_in; - assign io_req_tag_out = io_req_tag_in; assign io_req_ready_in = io_req_ready_out; assign io_rsp_valid_in = io_rsp_valid_out; - assign io_rsp_data_in = io_rsp_data_out; assign io_rsp_tag_in = io_rsp_tag_out; + assign io_rsp_data_in = io_rsp_data_out; assign io_rsp_ready_out = io_rsp_ready_in; - end else begin - - reg [REQS_BITS-1:0] bus_req_sel; - - wire [NUM_REQUESTS-1:0] valid_requests; - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign valid_requests[i] = (| io_req_valid_in[i]); - end - - VX_rr_arbiter #( - .N(NUM_REQUESTS) - ) arbiter ( - .clk (clk), - .reset (reset), - .requests (valid_requests), - .grant_index (bus_req_sel), - `UNUSED_PIN (grant_valid), - `UNUSED_PIN (grant_onehot) - ); - - assign io_req_valid_out = io_req_valid_in [bus_req_sel]; - assign io_req_rw_out = io_req_rw_in [bus_req_sel]; - assign io_req_byteen_out = io_req_byteen_in [bus_req_sel]; - assign io_req_addr_out = io_req_addr_in [bus_req_sel]; - assign io_req_data_out = io_req_data_in [bus_req_sel]; - assign io_req_tag_out = {io_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)}; - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign io_req_ready_in[i] = io_req_ready_out && (bus_req_sel == REQS_BITS'(i)); - end - - wire [REQS_BITS-1:0] bus_rsp_sel = io_rsp_tag_out[REQS_BITS-1:0]; - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign io_rsp_valid_in[i] = io_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i)); - assign io_rsp_data_in[i] = io_rsp_data_out; - assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; - end - assign io_rsp_ready_out = io_rsp_ready_in[bus_rsp_sel]; - end endmodule \ No newline at end of file diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index 502b3bfc..937231e1 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -14,88 +14,98 @@ module VX_mem_arb #( input wire reset, // input requests - input wire [NUM_REQUESTS-1:0] mem_req_valid_in, + input wire [NUM_REQUESTS-1:0] mem_req_valid_in, + input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in, + input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in, input wire [NUM_REQUESTS-1:0] mem_req_rw_in, input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in, - input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in, - input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in, + input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in, output wire [NUM_REQUESTS-1:0] mem_req_ready_in, // input response output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in, - output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in, output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in, + output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in, input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in, // output request output wire mem_req_valid_out, + output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out, + output wire [ADDR_WIDTH-1:0] mem_req_addr_out, output wire mem_req_rw_out, output wire [WORD_SIZE-1:0] mem_req_byteen_out, - output wire [ADDR_WIDTH-1:0] mem_req_addr_out, output wire [WORD_WIDTH-1:0] mem_req_data_out, - output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out, input wire mem_req_ready_out, // output response input wire mem_rsp_valid_out, - input wire [WORD_WIDTH-1:0] mem_rsp_data_out, input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out, + input wire [WORD_WIDTH-1:0] mem_rsp_data_out, output wire mem_rsp_ready_out ); - if (NUM_REQUESTS == 1) begin + if (NUM_REQUESTS > 1) begin + + wire [REQS_BITS-1:0] req_idx; + wire [NUM_REQUESTS-1:0] req_1hot; + + VX_rr_arbiter #( + .N(NUM_REQUESTS) + ) req_arb ( + .clk (clk), + .reset (reset), + .requests (mem_req_valid_in), + `UNUSED_PIN (grant_valid), + .grant_index (req_idx), + .grant_onehot (req_1hot) + ); + + wire stall = mem_req_valid_out && ~mem_req_ready_out; + + VX_generic_register #( + .N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH), + .PASSTHRU(NUM_REQUESTS <= 2) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({mem_req_valid_in[req_idx], {mem_req_tag_in[req_idx], REQS_BITS'(req_idx)}, mem_req_addr_in[req_idx], mem_req_rw_in[req_idx], mem_req_byteen_in[req_idx], mem_req_data_in[req_idx]}), + .out ({mem_req_valid_out, mem_req_tag_out, mem_req_addr_out, mem_req_rw_out, mem_req_byteen_out, mem_req_data_out}) + ); + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign mem_req_ready_in[i] = req_1hot[i] && ~stall; + end + + /////////////////////////////////////////////////////////////////////// + + wire [REQS_BITS-1:0] rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0]; + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (rsp_sel == REQS_BITS'(i)); + assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; + assign mem_rsp_data_in[i] = mem_rsp_data_out; + end + assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel]; + + end else begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign mem_req_valid_out = mem_req_valid_in; + assign mem_req_tag_out = mem_req_tag_in; + assign mem_req_addr_out = mem_req_addr_in; assign mem_req_rw_out = mem_req_rw_in; assign mem_req_byteen_out = mem_req_byteen_in; - assign mem_req_addr_out = mem_req_addr_in; assign mem_req_data_out = mem_req_data_in; - assign mem_req_tag_out = mem_req_tag_in; assign mem_req_ready_in = mem_req_ready_out; assign mem_rsp_valid_in = mem_rsp_valid_out; - assign mem_rsp_data_in = mem_rsp_data_out; assign mem_rsp_tag_in = mem_rsp_tag_out; + assign mem_rsp_data_in = mem_rsp_data_out; assign mem_rsp_ready_out = mem_rsp_ready_in; - end else begin - - reg [REQS_BITS-1:0] bus_req_sel; - - VX_rr_arbiter #( - .N(NUM_REQUESTS) - ) arbiter ( - .clk (clk), - .reset (reset), - .requests (mem_req_valid_in), - .grant_index (bus_req_sel), - `UNUSED_PIN (grant_valid), - `UNUSED_PIN (grant_onehot) - ); - - assign mem_req_valid_out = mem_req_valid_in [bus_req_sel]; - assign mem_req_rw_out = mem_req_rw_in [bus_req_sel]; - assign mem_req_byteen_out = mem_req_byteen_in [bus_req_sel]; - assign mem_req_addr_out = mem_req_addr_in [bus_req_sel]; - assign mem_req_data_out = mem_req_data_in [bus_req_sel]; - assign mem_req_tag_out = {mem_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)}; - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign mem_req_ready_in[i] = mem_req_ready_out && (bus_req_sel == REQS_BITS'(i)); - end - - wire [REQS_BITS-1:0] bus_rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0]; - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i)); - assign mem_rsp_data_in[i] = mem_rsp_data_out; - assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; - end - assign mem_rsp_ready_out = mem_rsp_ready_in[bus_rsp_sel]; - end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 64fa9915..ba2f7c9d 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -216,27 +216,30 @@ module VX_bank #( .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) ) core_req_arb ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), + // Enqueue - .reqq_push (core_req_fire), - .bank_valids (core_req_valid), - .bank_rw (core_req_rw), - .bank_byteen (core_req_byteen), - .bank_addr (core_req_addr), - .bank_writedata (core_req_data), - .bank_tag (core_req_tag), + .push (core_req_fire), + .tag_in (core_req_tag), + .valids_in (core_req_valid), + .rw_in (core_req_rw), + .byteen_in (core_req_byteen), + .addr_in (core_req_addr), + .writedata_in (core_req_data), // Dequeue - .reqq_pop (reqq_pop), - .reqq_tid_st0 (reqq_tid_st0), - .reqq_rw_st0 (reqq_rw_st0), - .reqq_byteen_st0 (reqq_byteen_st0), - .reqq_addr_st0 (reqq_addr_st0), - .reqq_writedata_st0(reqq_writeword_st0), - .reqq_tag_st0 (reqq_tag_st0), - .reqq_empty (reqq_empty), - .reqq_full (reqq_full) + .pop (reqq_pop), + .tag_out (reqq_tag_st0), + .tid_out (reqq_tid_st0), + .rw_out (reqq_rw_st0), + .byteen_out (reqq_byteen_st0), + .addr_out (reqq_addr_st0), + .writedata_out (reqq_writeword_st0), + + // States + .empty (reqq_empty), + .full (reqq_full) ); wire msrq_pop; @@ -252,7 +255,6 @@ module VX_bank #( wire [WORD_SIZE-1:0] msrq_byteen_st0; wire msrq_is_snp_st0; wire msrq_snp_invalidate_st0; - wire msrq_pending_hazard_st1; wire is_msrq_miss_st2; wire is_msrq_miss_st3; @@ -299,7 +301,9 @@ module VX_bank #( wire snp_invalidate_st1; wire is_msrq_st1; wire msrq_pending_hazard_st1; - wire[`LINE_ADDR_WIDTH-1:0] addr_st2; + wire miss_st3; + wire force_miss_st3; + wire [`LINE_ADDR_WIDTH-1:0] addr_st3; assign is_msrq_st0 = msrq_pop_unqual; @@ -373,11 +377,11 @@ module VX_bank #( wire writeen_st2; wire miss_st1; wire miss_st2; - wire miss_st3; wire dirty_st1; wire mem_rw_st1; wire [WORD_SIZE-1:0] mem_byteen_st1; wire force_miss_st2; + wire[`LINE_ADDR_WIDTH-1:0] addr_st2; `DEBUG_BEGIN wire [`REQ_TAG_WIDTH-1:0] tag_st1; wire [`REQS_BITS-1:0] tid_st1; @@ -410,28 +414,28 @@ module VX_bank #( .reset (reset), `ifdef DBG_CORE_REQ_INFO - .debug_pc_st1 (debug_pc_st1), - .debug_rd_st1 (debug_rd_st1), - .debug_wid_st1 (debug_wid_st1), - .debug_tagid_st1(debug_tagid_st1), + .debug_pc (debug_pc_st1), + .debug_rd (debug_rd_st1), + .debug_wid (debug_wid_st1), + .debug_tagid (debug_tagid_st1), `endif .stall (pipeline_stall), - // Actual Read/Write - .valid_req_st1 (valid_st1), - .writefill_st1 (is_fill_st1), - .addr_st1 (addr_st1), - .mem_rw_st1 (mem_rw_st1), - .is_snp_st1 (is_snp_st1), - .snp_invalidate_st1(snp_invalidate_st1), - .force_miss_st1 (force_miss_st1), + // Inputs + .valid_in (valid_st1), + .addr_in (addr_st1), + .is_write_in (mem_rw_st1), + .is_fill_in (is_fill_st1), + .is_snp_in (is_snp_st1), + .snp_invalidate_in(snp_invalidate_st1), + .force_miss_in (force_miss_st1), - // Read Data - .readtag_st1 (readtag_st1), - .miss_st1 (miss_st1), - .dirty_st1 (dirty_st1), - .writeen_st1 (writeen_st1) + // Outputs + .readtag_out (readtag_st1), + .miss_out (miss_st1), + .dirty_out (dirty_st1), + .writeen_out (writeen_st1) ); wire valid_st2; @@ -440,8 +444,7 @@ module VX_bank #( wire [`WORD_WIDTH-1:0] readword_st2; wire [`BANK_LINE_WIDTH-1:0] readdata_st2; wire [`BANK_LINE_WIDTH-1:0] writedata_st2; - wire [WORD_SIZE-1:0] mem_byteen_st2; - wire miss_st2; + wire [WORD_SIZE-1:0] mem_byteen_st2; wire dirty_st2; wire [BANK_LINE_SIZE-1:0] dirtyb_st2; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; @@ -449,7 +452,6 @@ module VX_bank #( wire is_fill_st2; wire is_snp_st2; wire snp_invalidate_st2; - wire force_miss_st2; wire is_msrq_st2; VX_generic_register #( @@ -486,37 +488,35 @@ module VX_bank #( .reset (reset), `ifdef DBG_CORE_REQ_INFO - .debug_pc_st2 (debug_pc_st2), - .debug_rd_st2 (debug_rd_st2), - .debug_wid_st2 (debug_wid_st2), - .debug_tagid_st2(debug_tagid_st2), + .debug_pc (debug_pc_st2), + .debug_rd (debug_rd_st2), + .debug_wid (debug_wid_st2), + .debug_tagid (debug_tagid_st2), `endif .stall (pipeline_stall), - // Actual Read/Write - .valid_req_st2 (valid_st2), - .writeen_st2 (writeen_st2), - .writefill_st2 (is_fill_st2), - .addr_st2 (addr_st2), - .wordsel_st2 (wsel_st2), - .mem_byteen_st2 (mem_byteen_st2), - .writeword_st2 (writeword_st2), - .writedata_st2 (writedata_st2), + // Inputs + .valid_in (valid_st2), + .addr_in (addr_st2), + .writeen_in (writeen_st2), + .is_fill_in (is_fill_st2), + .wordsel_in (wsel_st2), + .byteen_in (mem_byteen_st2), + .writeword_in (writeword_st2), + .writedata_in (writedata_st2), - // Read Data - .readword_st2 (readword_st2), - .readdata_st2 (readdata_st2), - .dirtyb_st2 (dirtyb_st2) + // Outputs + .readword_out (readword_st2), + .readdata_out (readdata_st2), + .dirtyb_out (dirtyb_st2) ); - wire valid_st3; - wire [`LINE_ADDR_WIDTH-1:0] addr_st3; + wire valid_st3; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st3; wire [`WORD_WIDTH-1:0] writeword_st3; wire [`WORD_WIDTH-1:0] readword_st3; wire [`BANK_LINE_WIDTH-1:0] readdata_st3; - wire miss_st3; wire dirty_st3; wire [BANK_LINE_SIZE-1:0] dirtyb_st3; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st3; @@ -524,7 +524,6 @@ module VX_bank #( wire is_fill_st3; wire is_snp_st3; wire snp_invalidate_st3; - wire force_miss_st3; wire is_msrq_st3; VX_generic_register #( diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index 36fba049..f806a04e 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -16,26 +16,26 @@ module VX_bank_core_req_arb #( input wire reset, // Enqueue Data - input wire reqq_push, - input wire [NUM_REQUESTS-1:0] bank_valids, - input wire [`CORE_REQ_TAG_COUNT-1:0] bank_rw, - input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] bank_byteen, - input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata, - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] bank_addr, - input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] bank_tag, + input wire push, + input wire [NUM_REQUESTS-1:0] valids_in, + input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in, + input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] byteen_in, + input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] writedata_in, + input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in, + input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in, // Dequeue Data - input wire reqq_pop, - output wire [`REQS_BITS-1:0] reqq_tid_st0, - output wire reqq_rw_st0, - output wire [WORD_SIZE-1:0] reqq_byteen_st0, - output wire [`WORD_ADDR_WIDTH-1:0] reqq_addr_st0, - output wire [`WORD_WIDTH-1:0] reqq_writedata_st0, - output wire [CORE_TAG_WIDTH-1:0] reqq_tag_st0, + input wire pop, + output wire [`REQS_BITS-1:0] tid_out, + output wire rw_out, + output wire [WORD_SIZE-1:0] byteen_out, + output wire [`WORD_ADDR_WIDTH-1:0] addr_out, + output wire [`WORD_WIDTH-1:0] writedata_out, + output wire [CORE_TAG_WIDTH-1:0] tag_out, // State Data - output wire reqq_empty, - output wire reqq_full + output wire empty, + output wire full ); wire [NUM_REQUESTS-1:0] out_per_valids; @@ -64,21 +64,21 @@ module VX_bank_core_req_arb #( wire use_empty = !(| use_per_valids); wire out_empty = !(| out_per_valids) || o_empty; - wire push_qual = reqq_push && !reqq_full; + wire push_qual = push && !full; wire pop_qual = !out_empty && use_empty; VX_generic_queue #( - .DATAW($bits(bank_valids) + $bits(bank_addr) + $bits(bank_writedata) + $bits(bank_tag) + $bits(bank_rw) + $bits(bank_byteen)), + .DATAW($bits(valids_in) + $bits(addr_in) + $bits(writedata_in) + $bits(tag_in) + $bits(rw_in) + $bits(byteen_in)), .SIZE(CREQ_SIZE) ) reqq_queue ( .clk (clk), .reset (reset), .push (push_qual), - .data_in ({bank_valids, bank_rw, bank_byteen, bank_addr, bank_writedata, bank_tag}), + .data_in ({valids_in, rw_in, byteen_in, addr_in, writedata_in, tag_in}), .pop (pop_qual), .data_out ({out_per_valids, out_per_rw, out_per_byteen, out_per_addr, out_per_writedata, out_per_tag}), .empty (o_empty), - .full (reqq_full), + .full (full), `UNUSED_PIN (size) ); @@ -91,43 +91,33 @@ module VX_bank_core_req_arb #( assign qual_rw = use_per_rw; assign qual_byteen = use_per_byteen; - wire[`REQS_BITS-1:0] qual_request_index; - wire qual_has_request; - + wire sel_valid; + wire[`REQS_BITS-1:0] sel_idx; + VX_fixed_arbiter #( .N(NUM_REQUESTS) ) sel_bank ( .clk (clk), .reset (reset), .requests (qual_valids), - .grant_index (qual_request_index), - .grant_valid (qual_has_request), + .grant_valid (sel_valid), + .grant_index (sel_idx), `UNUSED_PIN (grant_onehot) ); - assign reqq_empty = !qual_has_request; - assign reqq_tid_st0 = qual_request_index; - assign reqq_byteen_st0 = qual_byteen[qual_request_index]; - assign reqq_addr_st0 = qual_addr[qual_request_index]; - assign reqq_writedata_st0 = qual_writedata[qual_request_index]; + assign empty = !sel_valid; + assign tid_out = sel_idx; + assign byteen_out = qual_byteen[sel_idx]; + assign addr_out = qual_addr[sel_idx]; + assign writedata_out = qual_writedata[sel_idx]; if (CORE_TAG_ID_BITS != 0) begin - assign reqq_tag_st0 = qual_tag; - assign reqq_rw_st0 = qual_rw; + assign tag_out = qual_tag; + assign rw_out = qual_rw; end else begin - assign reqq_tag_st0 = qual_tag[qual_request_index]; - assign reqq_rw_st0 = qual_rw[qual_request_index]; - end - -`DEBUG_BLOCK( - reg [NUM_REQUESTS-1:0] updated_valids; - always @(*) begin - updated_valids = qual_valids; - if (qual_has_request) begin - updated_valids[qual_request_index] = 0; - end + assign tag_out = qual_tag[sel_idx]; + assign rw_out = qual_rw[sel_idx]; end -) always @(posedge clk) begin if (reset) begin @@ -140,8 +130,8 @@ module VX_bank_core_req_arb #( use_per_addr <= out_per_addr; use_per_writedata <= out_per_writedata; use_per_tag <= out_per_tag; - end else if (reqq_pop) begin - use_per_valids[qual_request_index] <= 0; + end else if (pop) begin + use_per_valids[sel_idx] <= 0; end end end diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 39010bc6..608863bf 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -46,7 +46,7 @@ module VX_cache #( parameter CORE_TAG_WIDTH = 4, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 4, + parameter CORE_TAG_ID_BITS = 0, // dram request tag size parameter DRAM_TAG_WIDTH = 28, @@ -407,15 +407,15 @@ module VX_cache #( .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) ) cache_core_rsp_merge ( .clk (clk), - .reset (reset), - .per_bank_core_rsp_tid (per_bank_core_rsp_tid), + .reset (reset), .per_bank_core_rsp_valid (per_bank_core_rsp_valid), - .per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_tag (per_bank_core_rsp_tag), + .per_bank_core_rsp_tid (per_bank_core_rsp_tid), + .per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_ready (per_bank_core_rsp_ready), - .core_rsp_valid (core_rsp_valid), - .core_rsp_data (core_rsp_data), + .core_rsp_valid (core_rsp_valid), .core_rsp_tag (core_rsp_tag), + .core_rsp_data (core_rsp_data), .core_rsp_ready (core_rsp_ready) ); diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index b2dffbb6..e0124c15 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -18,29 +18,22 @@ module VX_cache_core_req_bank_sel #( output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid, output wire core_req_ready ); - reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r; - - if (NUM_BANKS == 1) begin - always @(*) begin - per_bank_valid_r = 0; - for (integer i = 0; i < NUM_REQUESTS; i++) begin - per_bank_valid_r[0][i] = core_req_valid[i]; - end - end - assign core_req_ready = per_bank_ready; - end else begin + if (NUM_BANKS > 1) begin + reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r; reg [NUM_BANKS-1:0] per_bank_ready_sel; always @(*) begin - per_bank_valid_r = 0; + per_bank_valid_r = 0; per_bank_ready_sel = {NUM_BANKS{1'b1}}; for (integer i = 0; i < NUM_REQUESTS; i++) begin per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i]; per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0; end end - assign core_req_ready = & (per_bank_ready | per_bank_ready_sel); - end - - assign per_bank_valid = per_bank_valid_r; + assign per_bank_valid = per_bank_valid_r; + assign core_req_ready = & (per_bank_ready | per_bank_ready_sel); + end else begin + assign per_bank_valid = core_req_valid; + assign core_req_ready = per_bank_ready; + end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 240359b2..c6059838 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -16,86 +16,101 @@ module VX_cache_core_rsp_merge #( input wire reset, // Per Bank WB - input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, - input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, - input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, - input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, + input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, + input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, + input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, + input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, // Core Writeback output wire [NUM_REQUESTS-1:0] core_rsp_valid, - output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, + output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, input wire core_rsp_ready ); + if (NUM_REQUESTS > 1) begin + wire [`BANK_BITS-1:0] sel_idx; - wire [`BANK_BITS-1:0] main_bank_index; - VX_fair_arbiter #( - .N(NUM_BANKS) - ) sel_bank ( - .clk (clk), - .reset (reset), - .requests (per_bank_core_rsp_valid), - .grant_index (main_bank_index), - `UNUSED_PIN (grant_valid), - `UNUSED_PIN (grant_onehot) - ); + VX_rr_arbiter #( + .N(NUM_BANKS) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (per_bank_core_rsp_valid), + `UNUSED_PIN (grant_valid), + .grant_index (sel_idx), + `UNUSED_PIN (grant_onehot) + ); - reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual; - reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; - reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; - reg [NUM_BANKS-1:0] core_rsp_bank_select; - - wire stall = ~core_rsp_ready && (| core_rsp_valid); + reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual; + reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; + reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; + reg [NUM_BANKS-1:0] core_rsp_bank_select; + + if (CORE_TAG_ID_BITS != 0) begin + always @(*) begin + core_rsp_valid_unqual = 0; + core_rsp_bank_select = 0; + core_rsp_data_unqual = 'x; + core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx]; + for (integer i = 0; i < NUM_BANKS; i++) begin + if (per_bank_core_rsp_valid[i] + && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[sel_idx][CORE_TAG_ID_BITS-1:0])) begin + core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; + core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; + core_rsp_bank_select[i] = 1; + end + end + end + end else begin + always @(*) begin + core_rsp_valid_unqual = 0; + core_rsp_valid_unqual[per_bank_core_rsp_tid[sel_idx]] = 1; + + core_rsp_bank_select = 0; + core_rsp_bank_select[sel_idx] = 1; - if (CORE_TAG_ID_BITS != 0) begin - always @(*) begin - core_rsp_valid_unqual = 0; - core_rsp_data_unqual = 0; - core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index]; - for (integer i = 0; i < NUM_BANKS; i++) begin - if (per_bank_core_rsp_valid[i] - && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin - core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; - core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; - core_rsp_bank_select[i] = 1; - end else begin - core_rsp_bank_select[i] = 0; - end - end - end + core_rsp_data_unqual = 'x; + core_rsp_data_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_data[sel_idx]; + + core_rsp_tag_unqual = 'x; + core_rsp_tag_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_tag[sel_idx]; + + for (integer i = 0; i < NUM_BANKS; i++) begin + if (per_bank_core_rsp_valid[i] && !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]) begin + core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; + core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; + core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i]; + core_rsp_bank_select[i] = 1; + end + end + end + end + + wire stall = ~core_rsp_ready && (| core_rsp_valid); + + VX_generic_register #( + .N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), + .PASSTHRU(NUM_BANKS <= 2) + ) core_wb_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), + .out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) + ); + + assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}}; end else begin - always @(*) begin - core_rsp_valid_unqual = 0; - core_rsp_data_unqual = 0; - core_rsp_tag_unqual = 0; - for (integer i = 0; i < NUM_BANKS; i++) begin - if (per_bank_core_rsp_valid[i] - && !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] - && ((main_bank_index == `BANK_BITS'(i)) - || (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin - core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; - core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; - core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i]; - core_rsp_bank_select[i] = 1; - end else begin - core_rsp_bank_select[i] = 0; - end - end - end - end + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + `UNUSED_VAR (per_bank_core_rsp_tid) - VX_generic_register #( - .N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)) - ) core_wb_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), - .out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) - ); - - assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}}; + assign core_rsp_valid = per_bank_core_rsp_valid; + assign core_rsp_tag = per_bank_core_rsp_tag; + assign core_rsp_data = per_bank_core_rsp_data; + assign per_bank_core_rsp_ready = core_rsp_ready; + end endmodule diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index 7eac5862..df502fe2 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -13,13 +13,13 @@ module VX_cache_dram_req_arb #( // Inputs input wire [NUM_BANKS-1:0] per_bank_dram_req_valid, + input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr, input wire [NUM_BANKS-1:0] per_bank_dram_req_rw, input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen, - input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr, input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data, output wire [NUM_BANKS-1:0] per_bank_dram_req_ready, - // Output + // Outputs output wire dram_req_valid, output wire dram_req_rw, output wire [BANK_LINE_SIZE-1:0] dram_req_byteen, @@ -28,36 +28,49 @@ module VX_cache_dram_req_arb #( input wire dram_req_ready ); - wire sel_valid; - wire [`BANK_BITS-1:0] sel_idx; - wire [NUM_BANKS-1:0] sel_1hot; - - VX_fixed_arbiter #( - .N(NUM_BANKS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (per_bank_dram_req_valid), - .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot(sel_1hot) - ); + if (NUM_BANKS > 1) begin + wire sel_valid; + wire [`BANK_BITS-1:0] sel_idx; + wire [NUM_BANKS-1:0] sel_1hot; + + VX_rr_arbiter #( + .N(NUM_BANKS) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (per_bank_dram_req_valid), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot(sel_1hot) + ); - wire stall = ~dram_req_ready && dram_req_valid; + wire stall = ~dram_req_ready && dram_req_valid; - VX_generic_register #( - .N(1 + 1 + BANK_LINE_SIZE + `DRAM_ADDR_WIDTH + `BANK_LINE_WIDTH) - ) core_wb_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({sel_valid, per_bank_dram_req_rw[sel_idx], per_bank_dram_req_byteen[sel_idx], per_bank_dram_req_addr[sel_idx], per_bank_dram_req_data[sel_idx]}), - .out ({dram_req_valid, dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}) - ); + VX_generic_register #( + .N(1 + `DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH), + .PASSTHRU(NUM_BANKS <= 2) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({sel_valid, per_bank_dram_req_addr[sel_idx], per_bank_dram_req_rw[sel_idx], per_bank_dram_req_byteen[sel_idx], per_bank_dram_req_data[sel_idx]}), + .out ({dram_req_valid, dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}) + ); - for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_dram_req_ready[i] = sel_1hot[i] && !stall; + for (genvar i = 0; i < NUM_BANKS; i++) begin + assign per_bank_dram_req_ready[i] = sel_1hot[i] && !stall; + end + end else begin + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + assign dram_req_valid = per_bank_dram_req_valid; + assign dram_req_rw = per_bank_dram_req_rw; + assign dram_req_byteen = per_bank_dram_req_byteen; + assign dram_req_addr = per_bank_dram_req_addr; + assign dram_req_data = per_bank_dram_req_data; + assign per_bank_dram_req_ready = dram_req_ready; end endmodule diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 2499d5f9..5aa70c4d 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -27,42 +27,43 @@ module VX_data_access #( `ifdef DBG_CORE_REQ_INFO `IGNORE_WARNINGS_BEGIN - input wire[31:0] debug_pc_st2, - input wire[`NR_BITS-1:0] debug_rd_st2, - input wire[`NW_BITS-1:0] debug_wid_st2, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2, + input wire[31:0] debug_pc, + input wire[`NR_BITS-1:0] debug_rd, + input wire[`NW_BITS-1:0] debug_wid, + input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid, `IGNORE_WARNINGS_END `endif input wire stall, - input wire valid_req_st2, - input wire writeen_st2, + // Inputs + input wire valid_in, `IGNORE_WARNINGS_BEGIN - input wire[`LINE_ADDR_WIDTH-1:0] addr_st2, + input wire[`LINE_ADDR_WIDTH-1:0] addr_in, `IGNORE_WARNINGS_END - input wire writefill_st2, - input wire[`WORD_WIDTH-1:0] writeword_st2, - input wire[`BANK_LINE_WIDTH-1:0] writedata_st2, + input wire writeen_in, + input wire is_fill_in, + input wire[`WORD_WIDTH-1:0] writeword_in, + input wire[`BANK_LINE_WIDTH-1:0] writedata_in, + input wire[WORD_SIZE-1:0] byteen_in, + input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_in, - input wire[WORD_SIZE-1:0] mem_byteen_st2, - input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st2, - - output wire[`WORD_WIDTH-1:0] readword_st2, - output wire[`BANK_LINE_WIDTH-1:0] readdata_st2, - output wire[BANK_LINE_SIZE-1:0] dirtyb_st2 + // Outputs + output wire[`WORD_WIDTH-1:0] readword_out, + output wire[`BANK_LINE_WIDTH-1:0] readdata_out, + output wire[BANK_LINE_SIZE-1:0] dirtyb_out ); - wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_st2; - wire[`BANK_LINE_WIDTH-1:0] qual_read_data_st2; + wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_out; + wire[`BANK_LINE_WIDTH-1:0] qual_read_data; - wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_st2; - wire[`BANK_LINE_WIDTH-1:0] use_read_data_st2; + wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_out; + wire[`BANK_LINE_WIDTH-1:0] use_read_data; wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_byte_enable; wire[`BANK_LINE_WIDTH-1:0] use_write_data; wire use_write_enable; - wire[`LINE_SELECT_BITS-1:0] addrline_st2 = addr_st2[`LINE_SELECT_BITS-1:0]; + wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0]; VX_data_store #( .CACHE_SIZE (CACHE_SIZE), @@ -74,28 +75,28 @@ module VX_data_access #( .reset (reset), - .read_addr (addrline_st2), - .read_dirtyb (qual_read_dirtyb_st2), - .read_data (qual_read_data_st2), + .read_addr (addrline), + .read_dirtyb (qual_read_dirtyb_out), + .read_data (qual_read_data), .write_enable(use_write_enable), - .write_fill (writefill_st2), + .write_fill (is_fill_in), .byte_enable (use_byte_enable), - .write_addr (addrline_st2), + .write_addr (addrline), .write_data (use_write_data) ); - assign use_read_dirtyb_st2= qual_read_dirtyb_st2; - assign use_read_data_st2 = qual_read_data_st2; + assign use_read_dirtyb_out= qual_read_dirtyb_out; + assign use_read_data = qual_read_data; if (`WORD_SELECT_WIDTH != 0) begin - wire [`WORD_WIDTH-1:0] readword = use_read_data_st2[wordsel_st2 * `WORD_WIDTH +: `WORD_WIDTH]; + wire [`WORD_WIDTH-1:0] readword = use_read_data[wordsel_in * `WORD_WIDTH +: `WORD_WIDTH]; for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_st2[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st2[i]}}; + assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{byteen_in[i]}}; end end else begin for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_st2[i * 8 +: 8] = use_read_data_st2[i * 8 +: 8] & {8{mem_byteen_st2[i]}}; + assign readword_out[i * 8 +: 8] = use_read_data[i * 8 +: 8] & {8{byteen_in[i]}}; end end @@ -103,33 +104,33 @@ module VX_data_access #( wire [`BANK_LINE_WIDTH-1:0] data_write; for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - wire word_sel = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st2 == `UP(`WORD_SELECT_WIDTH)'(i))); + wire word_sel = ((`WORD_SELECT_WIDTH == 0) || (wordsel_in == `UP(`WORD_SELECT_WIDTH)'(i))); - assign byte_enable[i] = writefill_st2 ? {WORD_SIZE{1'b1}} : - word_sel ? mem_byteen_st2 : + assign byte_enable[i] = is_fill_in ? {WORD_SIZE{1'b1}} : + word_sel ? byteen_in : {WORD_SIZE{1'b0}}; - assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = writefill_st2 ? writedata_st2[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st2; + assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = is_fill_in ? writedata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_in; end - assign use_write_enable = valid_req_st2 && writeen_st2 && !stall; + assign use_write_enable = valid_in && writeen_in && !stall; assign use_byte_enable = byte_enable; assign use_write_data = data_write; - assign dirtyb_st2 = use_read_dirtyb_st2; - assign readdata_st2 = use_read_data_st2; + assign dirtyb_out = use_read_dirtyb_out; + assign readdata_out = use_read_data; `ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin - if (valid_req_st2 && !stall) begin + if (valid_in && !stall) begin if (use_write_enable) begin - if (writefill_st2) begin - $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), dirtyb_st2, addrline_st2, use_write_data); + if (is_fill_in) begin + $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data); end else begin - $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dirtyb_st2, addrline_st2, wordsel_st2, writeword_st2); + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, writeword_in); end end else begin - $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dirtyb_st2, addrline_st2, wordsel_st2, qual_read_data_st2); + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data); end end end diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 473fd747..26367354 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -93,24 +93,46 @@ module VX_snp_forwarder #( assign snp_req_ready = !sfq_full && fwdout_ready; - reg [`REQS_BITS-1:0] fwdin_sel; + if (NUM_REQUESTS > 1) begin + wire sel_valid; + wire [`REQS_BITS-1:0] sel_idx; + wire [NUM_REQUESTS-1:0] sel_1hot; - VX_fixed_arbiter #( - .N(NUM_REQUESTS) - ) arbiter ( - .clk (clk), - .reset (reset), - .requests (snp_fwdin_valid), - .grant_index (fwdin_sel), - `UNUSED_PIN (grant_valid), - `UNUSED_PIN (grant_onehot) - ); + VX_fixed_arbiter #( + .N(NUM_REQUESTS) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (snp_fwdin_valid), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot (sel_1hot) + ); - assign fwdin_valid = snp_fwdin_valid[fwdin_sel]; - assign fwdin_tag = snp_fwdin_tag[fwdin_sel]; + assign fwdin_valid = snp_fwdin_valid[sel_idx]; + assign fwdin_tag = snp_fwdin_tag[sel_idx]; - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i)); + wire stall = fwdin_valid && ~fwdin_ready; + + VX_generic_register #( + .N(1 + `LOG2UP(SNRQ_SIZE)), + .PASSTHRU(NUM_REQUESTS <= 2) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({sel_valid, snp_fwdin_tag[sel_idx]}), + .out ({fwdin_valid, fwdin_tag}) + ); + + for (genvar i = 0; i < NUM_REQUESTS; i++) begin + assign snp_fwdin_ready[i] = sel_1hot[i] && !stall; + end + end else begin + assign fwdin_valid = snp_fwdin_valid; + assign fwdin_tag = snp_fwdin_tag; + assign snp_fwdin_ready = fwdin_ready; end `ifdef DBG_PRINT_CACHE_SNP @@ -122,7 +144,7 @@ module VX_snp_forwarder #( $display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_invalidate[0], snp_fwdout_tag[0]); end if (fwdin_valid && fwdin_ready) begin - $display("%t: cache%0d snp-fwd-in[%0d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag); + $display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag); end if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag); diff --git a/hw/rtl/cache/VX_snp_rsp_arb.v b/hw/rtl/cache/VX_snp_rsp_arb.v index 35449c4a..ebd40db1 100644 --- a/hw/rtl/cache/VX_snp_rsp_arb.v +++ b/hw/rtl/cache/VX_snp_rsp_arb.v @@ -16,37 +16,43 @@ module VX_snp_rsp_arb #( output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, input wire snp_rsp_ready ); + if (NUM_BANKS > 1) begin + wire sel_valid; + wire [`BANK_BITS-1:0] sel_idx; + wire [NUM_BANKS-1:0] sel_1hot; - wire sel_valid; - wire [`BANK_BITS-1:0] sel_idx; - wire [NUM_BANKS-1:0] sel_1hot; + VX_fixed_arbiter #( + .N(NUM_BANKS) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (per_bank_snp_rsp_valid), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot(sel_1hot) + ); - VX_fixed_arbiter #( - .N(NUM_BANKS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (per_bank_snp_rsp_valid), - .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot(sel_1hot) - ); + wire stall = ~snp_rsp_ready && snp_rsp_valid; - wire stall = ~snp_rsp_ready && snp_rsp_valid; + VX_generic_register #( + .N(1 + SNP_REQ_TAG_WIDTH), + .PASSTHRU(NUM_BANKS <= 2) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({sel_valid, per_bank_snp_rsp_tag[sel_idx]}), + .out ({snp_rsp_valid, snp_rsp_tag}) + ); - VX_generic_register #( - .N(1 + SNP_REQ_TAG_WIDTH) - ) core_wb_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({sel_valid, per_bank_snp_rsp_tag[sel_idx]}), - .out ({snp_rsp_valid, snp_rsp_tag}) - ); - - for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_snp_rsp_ready[i] = sel_1hot[i] && !stall; + for (genvar i = 0; i < NUM_BANKS; i++) begin + assign per_bank_snp_rsp_ready[i] = sel_1hot[i] && !stall; + end + end else begin + assign snp_rsp_valid = per_bank_snp_rsp_valid; + assign snp_rsp_tag = per_bank_snp_rsp_tag; + assign per_bank_snp_rsp_ready = snp_rsp_ready; end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 398de7fa..8e04d3a2 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -27,46 +27,44 @@ module VX_tag_access #( `ifdef DBG_CORE_REQ_INFO `IGNORE_WARNINGS_BEGIN - input wire[31:0] debug_pc_st1, - input wire[`NR_BITS-1:0] debug_rd_st1, - input wire[`NW_BITS-1:0] debug_wid_st1, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1, + input wire[31:0] debug_pc, + input wire[`NR_BITS-1:0] debug_rd, + input wire[`NW_BITS-1:0] debug_wid, + input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid, `IGNORE_WARNINGS_END `endif input wire stall, - input wire is_snp_st1, - input wire snp_invalidate_st1, + // Inputs + input wire valid_in, + input wire[`LINE_ADDR_WIDTH-1:0] addr_in, + input wire is_write_in, + input wire is_fill_in, + input wire is_snp_in, + input wire snp_invalidate_in, + input wire force_miss_in, - input wire[`LINE_ADDR_WIDTH-1:0] addr_st1, - - input wire valid_req_st1, - input wire writefill_st1, - - input wire mem_rw_st1, - - input wire force_miss_st1, - - output wire[`TAG_SELECT_BITS-1:0] readtag_st1, - output wire miss_st1, - output wire dirty_st1, - output wire writeen_st1 + // Outputs + output wire[`TAG_SELECT_BITS-1:0] readtag_out, + output wire miss_out, + output wire dirty_out, + output wire writeen_out ); - wire qual_read_valid_st1; - wire qual_read_dirty_st1; - wire[`TAG_SELECT_BITS-1:0] qual_read_tag_st1; + wire qual_read_valid; + wire qual_read_dirty; + wire[`TAG_SELECT_BITS-1:0] qual_read_tag; - wire use_read_valid_st1; - wire use_read_dirty_st1; - wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1; + wire use_read_valid; + wire use_read_dirty; + wire[`TAG_SELECT_BITS-1:0] use_read_tag; wire use_write_enable; wire use_invalidate; - wire[`TAG_SELECT_BITS-1:0] addrtag_st1 = addr_st1[`TAG_LINE_ADDR_RNG]; - wire[`LINE_SELECT_BITS-1:0] addrline_st1 = addr_st1[`LINE_SELECT_BITS-1:0]; + wire[`TAG_SELECT_BITS-1:0] addrtag = addr_in[`TAG_LINE_ADDR_RNG]; + wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0]; VX_tag_store #( .CACHE_SIZE (CACHE_SIZE), @@ -77,69 +75,69 @@ module VX_tag_access #( .clk (clk), .reset (reset), - .read_addr (addrline_st1), - .read_valid (qual_read_valid_st1), - .read_dirty (qual_read_dirty_st1), - .read_tag (qual_read_tag_st1), + .read_addr (addrline), + .read_valid (qual_read_valid), + .read_dirty (qual_read_dirty), + .read_tag (qual_read_tag), .invalidate (use_invalidate), .write_enable(use_write_enable), - .write_fill (writefill_st1), - .write_addr (addrline_st1), - .write_tag (addrtag_st1) + .write_fill (is_fill_in), + .write_addr (addrline), + .write_tag (addrtag) ); - assign use_read_valid_st1 = qual_read_valid_st1 || !DRAM_ENABLE; // If shared memory, always valid - assign use_read_dirty_st1 = qual_read_dirty_st1 && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache - assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : addrtag_st1; // Tag is always the same in SM + assign use_read_valid = qual_read_valid || !DRAM_ENABLE; // If shared memory, always valid + assign use_read_dirty = qual_read_dirty && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache + assign use_read_tag = DRAM_ENABLE ? qual_read_tag : addrtag; // Tag is always the same in SM // use "case equality" to handle uninitialized tag when block entry is not valid - wire tags_match = use_read_valid_st1 && (addrtag_st1 === use_read_tag_st1); + wire tags_match = use_read_valid && (addrtag === use_read_tag); - wire normal_write = valid_req_st1 - && mem_rw_st1 - && use_read_valid_st1 - && !writefill_st1 - && !is_snp_st1 - && !miss_st1 - && !force_miss_st1; + wire normal_write = valid_in + && is_write_in + && use_read_valid + && !is_fill_in + && !is_snp_in + && !miss_out + && !force_miss_in; - wire fill_write = valid_req_st1 && writefill_st1 + wire fill_write = valid_in && is_fill_in && !tags_match; // discard redundant fills because the block could be dirty assign use_write_enable = (normal_write || fill_write) && !stall; - assign use_invalidate = valid_req_st1 && is_snp_st1 + assign use_invalidate = valid_in && is_snp_in && tags_match - && (use_read_dirty_st1 || snp_invalidate_st1) // block is dirty or should invalidate - && !force_miss_st1 + && (use_read_dirty || snp_invalidate_in) // block is dirty or should invalidate + && !force_miss_in && !stall; - wire core_req_miss = valid_req_st1 && !is_snp_st1 && !writefill_st1 + wire core_req_miss = valid_in && !is_snp_in && !is_fill_in && !tags_match; - assign miss_st1 = core_req_miss; - assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1; - assign readtag_st1 = use_read_tag_st1; - assign writeen_st1 = use_write_enable; + assign miss_out = core_req_miss; + assign dirty_out = valid_in && use_read_valid && use_read_dirty; + assign readtag_out = use_read_tag; + assign writeen_out = use_write_enable; `ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin - if (valid_req_st1 && !stall) begin - if (writefill_st1 && use_read_valid_st1 && tags_match) begin - $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + if (valid_in && !stall) begin + if (is_fill_in && use_read_valid && tags_match) begin + $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID)); end - if (miss_st1) begin - $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, blk_tag_id=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, use_read_dirty_st1, qual_read_tag_st1, addrline_st1, addrtag_st1); + if (miss_out) begin + $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, blk_tag_id=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, use_read_dirty, qual_read_tag, addrline, addrtag); end else if ((| use_write_enable)) begin - if (writefill_st1) begin - $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), addrline_st1, addrtag_st1); + if (is_fill_in) begin + $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), addrline, addrtag); end else begin - $display("%t: cache%0d:%0d tag-write: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, addrline_st1, addrtag_st1); + $display("%t: cache%0d:%0d tag-write: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, addrline, addrtag); end end else begin - $display("%t: cache%0d:%0d tag-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, addrline_st1, qual_read_tag_st1); + $display("%t: cache%0d:%0d tag-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, addrline, qual_read_tag); end end end