From 5c40422e4f80d3bd40dfeabbc5905c494050f2b3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Jul 2021 10:14:48 -0700 Subject: [PATCH] dcache response bus optimization --- ci/regression.sh | 10 +- hw/rtl/VX_cluster.v | 1 + hw/rtl/VX_core.v | 1 + hw/rtl/VX_lsu_unit.v | 25 +-- hw/rtl/VX_mem_unit.v | 3 + hw/rtl/VX_pipeline.v | 23 +- hw/rtl/VX_smem_arb.v | 23 +- hw/rtl/Vortex.v | 1 + hw/rtl/cache/VX_cache.v | 197 +++++++++--------- hw/rtl/cache/VX_cache_define.vh | 2 +- ..._req_bank_sel.v => VX_core_req_bank_sel.v} | 141 ++++++++----- ...e_core_rsp_merge.v => VX_core_rsp_merge.v} | 55 +++-- hw/rtl/cache/VX_nc_bypass.v | 86 +++++--- hw/rtl/cache/VX_shared_mem.v | 30 ++- hw/rtl/interfaces/VX_dcache_core_rsp_if.v | 7 +- hw/syn/quartus/unittest/Makefile | 4 +- 16 files changed, 350 insertions(+), 259 deletions(-) rename hw/rtl/cache/{VX_cache_core_req_bank_sel.v => VX_core_req_bank_sel.v} (72%) rename hw/rtl/cache/{VX_cache_core_rsp_merge.v => VX_core_rsp_merge.v} (84%) diff --git a/ci/regression.sh b/ci/regression.sh index 6fac2bb5..67724853 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -49,6 +49,14 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood # using FPNEW FPU core FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood +# test cache banking +CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo +CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo + +# test cache multi-porting +CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo +CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo + # test 128-bit MEM block CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo @@ -66,7 +74,7 @@ CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 -- CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm # test long memory latency -CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=sgemm +CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo # test pipeline stress ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm --args="-n128" \ No newline at end of file diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 3af511db..f9895ca2 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -127,6 +127,7 @@ module VX_cluster #( .core_rsp_data (per_core_mem_rsp_data), .core_rsp_tag (per_core_mem_rsp_tag), .core_rsp_ready (per_core_mem_rsp_ready), + `UNUSED_PIN (core_rsp_tmask), // Memory request .mem_req_valid (mem_req_valid), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index ff1f6fab..380f89d4 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -101,6 +101,7 @@ module VX_core #( // Dcache core reponse .dcache_rsp_valid (dcache_core_rsp_if.valid), + .dcache_rsp_tmask (dcache_core_rsp_if.tmask), .dcache_rsp_data (dcache_core_rsp_if.data), .dcache_rsp_tag (dcache_core_rsp_if.tag), .dcache_rsp_ready (dcache_core_rsp_if.ready), diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 6ea74314..fa5f994c 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -120,7 +120,7 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; - wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; + wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready; wire mbuf_push = (| dcache_req_fire) && is_req_start // first submission only @@ -177,7 +177,7 @@ module VX_lsu_unit #( end end - assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid; + assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.tmask; always @(posedge clk) begin if (mbuf_push) begin @@ -212,11 +212,12 @@ module VX_lsu_unit #( end always @(*) begin + mem_req_data = req_data[i]; case (req_offset[i]) - 1: mem_req_data[31:8] = req_data[i][23:0]; - 2: mem_req_data[31:16] = req_data[i][15:0]; - 3: mem_req_data[31:24] = req_data[i][7:0]; - default: mem_req_data = req_data[i]; + 1: mem_req_data[31:8] = req_data[i][23:0]; + 2: mem_req_data[31:16] = req_data[i][15:0]; + 3: mem_req_data[31:24] = req_data[i][7:0]; + default:; endcase end @@ -269,7 +270,7 @@ module VX_lsu_unit #( end end - assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.valid; + assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.tmask; // send load commit @@ -282,8 +283,8 @@ module VX_lsu_unit #( .clk (clk), .reset (reset), .enable (!load_rsp_stall), - .data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), - .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) + .data_in ({dcache_rsp_if.valid, rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), + .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) ); // Can accept new cache response? @@ -298,7 +299,7 @@ module VX_lsu_unit #( `SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen); `SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data); `SCOPE_ASSIGN (dcache_req_tag, req_tag); - `SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}}); + `SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_fire}}); `SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data); `SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr); @@ -339,8 +340,8 @@ module VX_lsu_unit #( end end if (dcache_rsp_fire) begin - $write("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=", - $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd); + $write("%t: D$%0d Rsp: tmask=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=", + $time, CORE_ID, dcache_rsp_if.tmask, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd); `PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS); $write(", is_dup=%b\n", rsp_is_dup); end diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 343891ec..3f7b8616 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -107,6 +107,7 @@ module VX_mem_unit # ( .core_rsp_data (icache_core_rsp_if.data), .core_rsp_tag (icache_core_rsp_if.tag), .core_rsp_ready (icache_core_rsp_if.ready), + `UNUSED_PIN (core_rsp_tmask), `ifdef PERF_ENABLE .perf_cache_if (perf_icache_if), @@ -162,6 +163,7 @@ module VX_mem_unit # ( // Core response .core_rsp_valid (dcache_rsp_if.valid), + .core_rsp_tmask (dcache_rsp_if.tmask), .core_rsp_data (dcache_rsp_if.data), .core_rsp_tag (dcache_rsp_if.tag), .core_rsp_ready (dcache_rsp_if.ready), @@ -241,6 +243,7 @@ module VX_mem_unit # ( // Core response .core_rsp_valid (smem_rsp_if.valid), + .core_rsp_tmask (smem_rsp_if.tmask), .core_rsp_data (smem_rsp_if.data), .core_rsp_tag (smem_rsp_if.tag), .core_rsp_ready (smem_rsp_if.ready) diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index be31cb29..925efa4f 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -19,7 +19,8 @@ module VX_pipeline #( input wire [`NUM_THREADS-1:0] dcache_req_ready, // Dcache core reponse - input wire [`NUM_THREADS-1:0] dcache_rsp_valid, + input wire dcache_rsp_valid, + input wire [`NUM_THREADS-1:0] dcache_rsp_tmask, input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data, input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag, output wire dcache_rsp_ready, @@ -72,6 +73,7 @@ module VX_pipeline #( ) dcache_core_rsp_if(); assign dcache_core_rsp_if.valid = dcache_rsp_valid; + assign dcache_core_rsp_if.tmask = dcache_rsp_tmask; assign dcache_core_rsp_if.data = dcache_rsp_data; assign dcache_core_rsp_if.tag = dcache_rsp_tag; assign dcache_rsp_ready = dcache_core_rsp_if.ready; @@ -130,12 +132,21 @@ module VX_pipeline #( VX_perf_pipeline_if perf_pipeline_if(); `endif + wire fetch_reset, decode_reset, issue_reset, execute_reset, commit_reset; + VX_reset_relay #( + .NUM_NODES (5) + ) reset_relay ( + .clk (clk), + .reset (reset), + .reset_o ({fetch_reset, decode_reset, issue_reset, execute_reset, commit_reset}) + ); + VX_fetch #( .CORE_ID(CORE_ID) ) fetch ( `SCOPE_BIND_VX_pipeline_fetch .clk (clk), - .reset (reset), + .reset (fetch_reset), .icache_req_if (icache_core_req_if), .icache_rsp_if (icache_core_rsp_if), .wstall_if (wstall_if), @@ -150,7 +161,7 @@ module VX_pipeline #( .CORE_ID(CORE_ID) ) decode ( .clk (clk), - .reset (reset), + .reset (decode_reset), .ifetch_rsp_if (ifetch_rsp_if), .decode_if (decode_if), .wstall_if (wstall_if), @@ -163,7 +174,7 @@ module VX_pipeline #( `SCOPE_BIND_VX_pipeline_issue .clk (clk), - .reset (reset), + .reset (issue_reset), `ifdef PERF_ENABLE .perf_pipeline_if (perf_pipeline_if), @@ -185,7 +196,7 @@ module VX_pipeline #( `SCOPE_BIND_VX_pipeline_execute .clk (clk), - .reset (reset), + .reset (execute_reset), `ifdef PERF_ENABLE .perf_memsys_if (perf_memsys_if), @@ -219,7 +230,7 @@ module VX_pipeline #( .CORE_ID(CORE_ID) ) commit ( .clk (clk), - .reset (reset), + .reset (commit_reset), .alu_commit_if (alu_commit_if), .ld_commit_if (ld_commit_if), diff --git a/hw/rtl/VX_smem_arb.v b/hw/rtl/VX_smem_arb.v index 48eb1680..32bbee4f 100644 --- a/hw/rtl/VX_smem_arb.v +++ b/hw/rtl/VX_smem_arb.v @@ -53,32 +53,21 @@ module VX_smem_arb ( // handle responses // - wire [1:0] rsp_valid_in; - wire [1:0][RSP_DATAW-1:0] rsp_data_in; - wire [`NUM_THREADS-1:0] core_rsp_tmask; - wire core_rsp_valid; - - assign rsp_valid_in[0] = (| cache_rsp_if.valid); - assign rsp_valid_in[1] = (| smem_rsp_if.valid); - - assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}; - assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}}; - VX_stream_arbiter #( .NUM_REQS (2), .DATAW (RSP_DATAW), + .TYPE ("X"), .BUFFERED (1) ) rsp_arb ( .clk (clk), .reset (reset), - .valid_in (rsp_valid_in), - .data_in (rsp_data_in), + .valid_in ({smem_rsp_if.valid, cache_rsp_if.valid}), + .data_in ({{smem_rsp_if.tmask, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}}, + {cache_rsp_if.tmask, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}}), .ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}), - .valid_out (core_rsp_valid), - .data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}), + .valid_out (core_rsp_if.valid), + .data_out ({core_rsp_if.tmask, core_rsp_if.data, core_rsp_if.tag}), .ready_out (core_rsp_if.ready) ); - assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_tmask; - endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 34794854..e63986a4 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -125,6 +125,7 @@ module Vortex ( .core_rsp_data (per_cluster_mem_rsp_data), .core_rsp_tag (per_cluster_mem_rsp_tag), .core_rsp_ready (per_cluster_mem_rsp_ready), + `UNUSED_PIN (core_rsp_tmask), // Memory request .mem_req_valid (mem_req_valid), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 5769c404..d102cba7 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -64,10 +64,11 @@ module VX_cache #( output wire [NUM_REQS-1:0] core_req_ready, // Core response - output wire [NUM_REQS-1:0] core_rsp_valid, + output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid, + output wire [NUM_REQS-1:0] core_rsp_tmask, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, - output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, - input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready, + output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, + input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready, // Memory request output wire mem_req_valid, @@ -86,6 +87,7 @@ module VX_cache #( ); `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) + `STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value")) `ifdef PERF_ENABLE wire [NUM_BANKS-1:0] perf_read_miss_per_bank; @@ -97,39 +99,40 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// // Core request - wire [NUM_REQS-1:0] core_req_valid_out; - wire [NUM_REQS-1:0] core_req_rw_out; - wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_out; - wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_out; - wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_out; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_out; - wire [NUM_REQS-1:0] core_req_ready_out; + wire [NUM_REQS-1:0] core_req_valid_nc; + wire [NUM_REQS-1:0] core_req_rw_nc; + wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_nc; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_nc; + wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_nc; + wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_nc; + wire [NUM_REQS-1:0] core_req_ready_nc; // Core response - wire [NUM_REQS-1:0] core_rsp_valid_in; - wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in; - wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in; - wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready_in; + wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_nc; + wire [NUM_REQS-1:0] core_rsp_tmask_nc; + wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_nc; + wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_nc; + wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_nc; // Memory request - wire mem_req_valid_in; - wire mem_req_rw_in; - wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_in; - wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_in; - wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_in; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_in; - wire mem_req_ready_in; + wire mem_req_valid_nc; + wire mem_req_rw_nc; + wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc; + wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc; + wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc; + wire [MEM_TAG_WIDTH-1:0] mem_req_tag_nc; + wire mem_req_ready_nc; // Memory response - wire mem_rsp_valid_out; - wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_out; - wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_out; - wire mem_rsp_ready_out; + wire mem_rsp_valid_nc; + wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc; + wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_nc; + wire mem_rsp_ready_nc; if (NC_ENABLE) begin VX_nc_bypass #( .NUM_REQS (NUM_REQS), - .NUM_RSP_TAGS (`CORE_REQ_TAG_COUNT), + .NUM_RSP_TAGS (`CORE_RSP_TAGS), .NC_TAG_BIT (0), .CORE_ADDR_WIDTH(`WORD_ADDR_WIDTH), @@ -153,34 +156,36 @@ module VX_cache #( .core_req_ready_in (core_req_ready), // Core request out - .core_req_valid_out (core_req_valid_out), - .core_req_rw_out (core_req_rw_out), - .core_req_byteen_out(core_req_byteen_out), - .core_req_addr_out (core_req_addr_out), - .core_req_data_out (core_req_data_out), - .core_req_tag_out (core_req_tag_out), - .core_req_ready_out (core_req_ready_out), + .core_req_valid_out (core_req_valid_nc), + .core_req_rw_out (core_req_rw_nc), + .core_req_byteen_out(core_req_byteen_nc), + .core_req_addr_out (core_req_addr_nc), + .core_req_data_out (core_req_data_nc), + .core_req_tag_out (core_req_tag_nc), + .core_req_ready_out (core_req_ready_nc), // Core response in - .core_rsp_valid_in (core_rsp_valid_in), - .core_rsp_data_in (core_rsp_data_in), - .core_rsp_tag_in (core_rsp_tag_in), - .core_rsp_ready_in (core_rsp_ready_in), + .core_rsp_valid_in (core_rsp_valid_nc), + .core_rsp_tmask_in (core_rsp_tmask_nc), + .core_rsp_data_in (core_rsp_data_nc), + .core_rsp_tag_in (core_rsp_tag_nc), + .core_rsp_ready_in (core_rsp_ready_nc), // Core response out .core_rsp_valid_out (core_rsp_valid), + .core_rsp_tmask_out (core_rsp_tmask), .core_rsp_data_out (core_rsp_data), .core_rsp_tag_out (core_rsp_tag), .core_rsp_ready_out (core_rsp_ready), // Memory request in - .mem_req_valid_in (mem_req_valid_in), - .mem_req_rw_in (mem_req_rw_in), - .mem_req_byteen_in (mem_req_byteen_in), - .mem_req_addr_in (mem_req_addr_in), - .mem_req_data_in (mem_req_data_in), - .mem_req_tag_in (mem_req_tag_in), - .mem_req_ready_in (mem_req_ready_in), + .mem_req_valid_in (mem_req_valid_nc), + .mem_req_rw_in (mem_req_rw_nc), + .mem_req_byteen_in (mem_req_byteen_nc), + .mem_req_addr_in (mem_req_addr_nc), + .mem_req_data_in (mem_req_data_nc), + .mem_req_tag_in (mem_req_tag_nc), + .mem_req_ready_in (mem_req_ready_nc), // Memory request out .mem_req_valid_out (mem_req_valid), @@ -198,52 +203,53 @@ module VX_cache #( .mem_rsp_ready_in (mem_rsp_ready), // Memory response out - .mem_rsp_valid_out (mem_rsp_valid_out), - .mem_rsp_data_out (mem_rsp_data_out), - .mem_rsp_tag_out (mem_rsp_tag_out), - .mem_rsp_ready_out (mem_rsp_ready_out) + .mem_rsp_valid_out (mem_rsp_valid_nc), + .mem_rsp_data_out (mem_rsp_data_nc), + .mem_rsp_tag_out (mem_rsp_tag_nc), + .mem_rsp_ready_out (mem_rsp_ready_nc) ); end else begin - assign core_req_valid_out = core_req_valid; - assign core_req_rw_out = core_req_rw; - assign core_req_addr_out = core_req_addr; - assign core_req_byteen_out = core_req_byteen; - assign core_req_data_out = core_req_data; - assign core_req_tag_out = core_req_tag; - assign core_req_ready = core_req_ready_out; + assign core_req_valid_nc = core_req_valid; + assign core_req_rw_nc = core_req_rw; + assign core_req_addr_nc = core_req_addr; + assign core_req_byteen_nc = core_req_byteen; + assign core_req_data_nc = core_req_data; + assign core_req_tag_nc = core_req_tag; + assign core_req_ready = core_req_ready_nc; - assign core_rsp_valid = core_rsp_valid_in; - assign core_rsp_data = core_rsp_data_in; - assign core_rsp_tag = core_rsp_tag_in; - assign core_rsp_ready_in = core_rsp_ready; + assign core_rsp_valid = core_rsp_valid_nc; + assign core_rsp_tmask = core_rsp_tmask_nc; + assign core_rsp_data = core_rsp_data_nc; + assign core_rsp_tag = core_rsp_tag_nc; + assign core_rsp_ready_nc = core_rsp_ready; - assign mem_req_valid = mem_req_valid_in; - assign mem_req_rw = mem_req_rw_in; - assign mem_req_addr = mem_req_addr_in; - assign mem_req_byteen = mem_req_byteen_in; - assign mem_req_data = mem_req_data_in; - assign mem_req_tag = mem_req_tag_in; - assign mem_req_ready_in = mem_req_ready; + assign mem_req_valid = mem_req_valid_nc; + assign mem_req_rw = mem_req_rw_nc; + assign mem_req_addr = mem_req_addr_nc; + assign mem_req_byteen = mem_req_byteen_nc; + assign mem_req_data = mem_req_data_nc; + assign mem_req_tag = mem_req_tag_nc; + assign mem_req_ready_nc = mem_req_ready; - assign mem_rsp_valid_out = mem_rsp_valid; - assign mem_rsp_data_out = mem_rsp_data; - assign mem_rsp_tag_out = mem_rsp_tag; - assign mem_rsp_ready = mem_rsp_ready_out; + assign mem_rsp_valid_nc = mem_rsp_valid; + assign mem_rsp_data_nc = mem_rsp_data; + assign mem_rsp_tag_nc = mem_rsp_tag; + assign mem_rsp_ready = mem_rsp_ready_nc; end /////////////////////////////////////////////////////////////////////////// wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual; - wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_out_a, mem_rsp_tag_qual; + wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc_a, mem_rsp_tag_qual; wire mrsq_full, mrsq_empty; wire mrsq_push, mrsq_pop; - assign mrsq_push = mem_rsp_valid_out && mem_rsp_ready_out; - assign mem_rsp_ready_out = !mrsq_full; + assign mrsq_push = mem_rsp_valid_nc && mem_rsp_ready_nc; + assign mem_rsp_ready_nc = !mrsq_full; // trim out shared memory and non-cacheable flags - assign mem_rsp_tag_out_a = mem_rsp_tag_out[NC_ENABLE +: `MEM_ADDR_WIDTH]; + assign mem_rsp_tag_nc_a = mem_rsp_tag_nc[NC_ENABLE +: `MEM_ADDR_WIDTH]; VX_fifo_queue #( .DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH), @@ -254,7 +260,7 @@ module VX_cache #( .reset (reset), .push (mrsq_push), .pop (mrsq_pop), - .data_in ({mem_rsp_tag_out_a, mem_rsp_data_out}), + .data_in ({mem_rsp_tag_nc_a, mem_rsp_data_nc}), .data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}), .empty (mrsq_empty), .full (mrsq_full), @@ -263,7 +269,7 @@ module VX_cache #( `UNUSED_PIN (size) ); - `UNUSED_VAR (mem_rsp_tag_out) + `UNUSED_VAR (mem_rsp_tag_nc) /////////////////////////////////////////////////////////////////////////// @@ -316,7 +322,7 @@ module VX_cache #( assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)]; end - VX_cache_core_req_bank_sel #( + VX_core_req_bank_sel #( .CACHE_ID (CACHE_ID), .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -331,13 +337,13 @@ module VX_cache #( `ifdef PERF_ENABLE .bank_stalls(perf_cache_if.bank_stalls), `endif - .core_req_valid (core_req_valid_out), - .core_req_rw (core_req_rw_out), - .core_req_addr (core_req_addr_out), - .core_req_byteen(core_req_byteen_out), - .core_req_data (core_req_data_out), - .core_req_tag (core_req_tag_out), - .core_req_ready (core_req_ready_out), + .core_req_valid (core_req_valid_nc), + .core_req_rw (core_req_rw_nc), + .core_req_addr (core_req_addr_nc), + .core_req_byteen (core_req_byteen_nc), + .core_req_data (core_req_data_nc), + .core_req_tag (core_req_tag_nc), + .core_req_ready (core_req_ready_nc), .per_bank_core_req_valid (per_bank_core_req_valid), .per_bank_core_req_rw (per_bank_core_req_rw), .per_bank_core_req_addr (per_bank_core_req_addr), @@ -491,7 +497,7 @@ module VX_cache #( ); end - VX_cache_core_rsp_merge #( + VX_core_rsp_merge #( .CACHE_ID (CACHE_ID), .NUM_BANKS (NUM_BANKS), .NUM_PORTS (NUM_PORTS), @@ -508,10 +514,11 @@ module VX_cache #( .per_bank_core_rsp_tag (per_bank_core_rsp_tag), .per_bank_core_rsp_tid (per_bank_core_rsp_tid), .per_bank_core_rsp_ready (per_bank_core_rsp_ready), - .core_rsp_valid (core_rsp_valid_in), - .core_rsp_tag (core_rsp_tag_in), - .core_rsp_data (core_rsp_data_in), - .core_rsp_ready (core_rsp_ready_in) + .core_rsp_valid (core_rsp_valid_nc), + .core_rsp_tmask (core_rsp_tmask_nc), + .core_rsp_tag (core_rsp_tag_nc), + .core_rsp_data (core_rsp_data_nc), + .core_rsp_ready (core_rsp_ready_nc) ); wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; @@ -529,16 +536,16 @@ module VX_cache #( .valid_in (per_bank_mem_req_valid), .data_in (data_in), .ready_in (per_bank_mem_req_ready), - .valid_out (mem_req_valid_in), - .data_out ({mem_req_addr_in, mem_req_rw_in, mem_req_byteen_in, mem_req_data_in}), - .ready_out (mem_req_ready_in) + .valid_out (mem_req_valid_nc), + .data_out ({mem_req_addr_nc, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}), + .ready_out (mem_req_ready_nc) ); // build memory tag adding non-cacheable flag if (NC_ENABLE) begin - assign mem_req_tag_in = MEM_TAG_WIDTH'({mem_req_addr_in, 1'b0}); + assign mem_req_tag_nc = MEM_TAG_WIDTH'({mem_req_addr_nc, 1'b0}); end else begin - assign mem_req_tag_in = MEM_TAG_WIDTH'(mem_req_addr_in); + assign mem_req_tag_nc = MEM_TAG_WIDTH'(mem_req_addr_nc); end `ifdef PERF_ENABLE @@ -551,7 +558,7 @@ module VX_cache #( assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); if (CORE_TAG_ID_BITS != 0) begin - assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & {NUM_REQS{!core_rsp_ready}}); + assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}}); end else begin assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); end diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index 4b896521..134b85e6 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -53,7 +53,7 @@ /////////////////////////////////////////////////////////////////////////////// -`define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS) +`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS) `define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS) diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_core_req_bank_sel.v similarity index 72% rename from hw/rtl/cache/VX_cache_core_req_bank_sel.v rename to hw/rtl/cache/VX_core_req_bank_sel.v index 0631c5a8..7da07381 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_core_req_bank_sel.v @@ -1,6 +1,6 @@ `include "VX_cache_define.vh" -module VX_cache_core_req_bank_sel #( +module VX_core_req_bank_sel #( parameter CACHE_ID = 0, // Size of line inside a bank in bytes @@ -24,7 +24,7 @@ module VX_cache_core_req_bank_sel #( input wire reset, `ifdef PERF_ENABLE - output wire [`PERF_CTR_BITS-1:0] bank_stalls, + output wire [`PERF_CTR_BITS-1:0] bank_stalls, `endif input wire [NUM_REQS-1:0] core_req_valid, @@ -46,7 +46,8 @@ module VX_cache_core_req_bank_sel #( input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready ); `UNUSED_PARAM (CACHE_ID) - `STATIC_ASSERT (NUM_REQS >= NUM_BANKS, ("invalid number of banks")); + `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) + `STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value")) `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -72,21 +73,19 @@ module VX_cache_core_req_bank_sel #( end end + reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; + reg [NUM_BANKS-1:0] per_bank_core_req_rw_r; + reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; + reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; + reg [NUM_REQS-1:0] core_req_ready_r; + if (NUM_REQS > 1) begin - reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid_r; - reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r; - reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; - reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; - reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; - reg [NUM_BANKS-1:0] per_bank_core_req_rw_r; - reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; - reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; - - reg [NUM_REQS-1:0] core_req_ready_r; - - if (NUM_PORTS > 1) begin - + if (NUM_PORTS > 1) begin reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_line_addr_r; wire [NUM_REQS-1:0] core_req_line_match; @@ -201,7 +200,7 @@ module VX_cache_core_req_bank_sel #( end end - end else begin + end else begin always @(*) begin per_bank_core_req_valid_r = 0; @@ -227,14 +226,28 @@ module VX_cache_core_req_bank_sel #( end end - if (SHARED_BANK_READY == 0) begin - always @(*) begin - core_req_ready_r = 0; - for (integer j = 0; j < NUM_BANKS; ++j) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin - core_req_ready_r[i] = per_bank_core_req_ready[j]; - break; + if (NUM_BANKS > 1) begin + if (SHARED_BANK_READY == 0) begin + always @(*) begin + core_req_ready_r = 0; + for (integer j = 0; j < NUM_BANKS; ++j) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin + core_req_ready_r[i] = per_bank_core_req_ready[j]; + break; + end + end + end + end + end else begin + always @(*) begin + core_req_ready_r = 0; + for (integer j = 0; j < NUM_BANKS; ++j) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin + core_req_ready_r[i] = per_bank_core_req_ready; + break; + end end end end @@ -242,42 +255,64 @@ module VX_cache_core_req_bank_sel #( end else begin always @(*) begin core_req_ready_r = 0; - for (integer j = 0; j < NUM_BANKS; ++j) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin - core_req_ready_r[i] = per_bank_core_req_ready; - break; - end + for (integer i = 0; i < NUM_REQS; ++i) begin + if (core_req_valid[i]) begin + core_req_ready_r[i] = per_bank_core_req_ready; + break; end end end end - end + end - assign per_bank_core_req_valid = per_bank_core_req_valid_r; - assign per_bank_core_req_rw = per_bank_core_req_rw_r; - assign per_bank_core_req_addr = per_bank_core_req_addr_r; - assign per_bank_core_req_wsel = per_bank_core_req_wsel_r; - assign per_bank_core_req_byteen = per_bank_core_req_byteen_r; - assign per_bank_core_req_data = per_bank_core_req_data_r; - assign per_bank_core_req_tag = per_bank_core_req_tag_r; - assign per_bank_core_req_tid = per_bank_core_req_tid_r; - assign core_req_ready = core_req_ready_r; - end else begin - `UNUSED_VAR (core_req_bid) + if (NUM_BANKS > 1) begin + always @(*) begin + per_bank_core_req_valid_r = 0; + per_bank_core_req_rw_r = 'x; + per_bank_core_req_addr_r = 'x; + per_bank_core_req_wsel_r = 'x; + per_bank_core_req_byteen_r= 'x; + per_bank_core_req_data_r = 'x; + per_bank_core_req_tag_r = 'x; + per_bank_core_req_tid_r = 'x; + per_bank_core_req_valid_r[core_req_bid[0]] = core_req_valid; + per_bank_core_req_rw_r[core_req_bid[0]] = core_req_rw; + per_bank_core_req_addr_r[core_req_bid[0]] = core_req_line_addr; + per_bank_core_req_wsel_r[core_req_bid[0]] = core_req_wsel; + per_bank_core_req_byteen_r[core_req_bid[0]] = core_req_byteen; + per_bank_core_req_data_r[core_req_bid[0]] = core_req_data; + per_bank_core_req_tag_r[core_req_bid[0]] = core_req_tag; + per_bank_core_req_tid_r[core_req_bid[0]] = 0; + core_req_ready_r = per_bank_core_req_ready[core_req_bid[0]]; + end + end else begin + `UNUSED_VAR (core_req_bid) + always @(*) begin + per_bank_core_req_valid_r = core_req_valid; + per_bank_core_req_rw_r = core_req_rw; + per_bank_core_req_addr_r = core_req_line_addr; + per_bank_core_req_wsel_r = core_req_wsel; + per_bank_core_req_byteen_r = core_req_byteen; + per_bank_core_req_data_r = core_req_data; + per_bank_core_req_tag_r = core_req_tag; + per_bank_core_req_tid_r = 0; + core_req_ready_r = per_bank_core_req_ready; + end + end - assign per_bank_core_req_valid = core_req_valid; - assign per_bank_core_req_rw = core_req_rw; - assign per_bank_core_req_addr = core_req_line_addr; - assign per_bank_core_req_wsel = core_req_wsel; - assign per_bank_core_req_byteen = core_req_byteen; - assign per_bank_core_req_data = core_req_data; - assign per_bank_core_req_tag = core_req_tag; - assign per_bank_core_req_tid = 0; - assign core_req_ready = per_bank_core_req_ready; - end + end + + assign per_bank_core_req_valid = per_bank_core_req_valid_r; + assign per_bank_core_req_rw = per_bank_core_req_rw_r; + assign per_bank_core_req_addr = per_bank_core_req_addr_r; + assign per_bank_core_req_wsel = per_bank_core_req_wsel_r; + assign per_bank_core_req_byteen = per_bank_core_req_byteen_r; + assign per_bank_core_req_data = per_bank_core_req_data_r; + assign per_bank_core_req_tag = per_bank_core_req_tag_r; + assign per_bank_core_req_tid = per_bank_core_req_tid_r; + assign core_req_ready = core_req_ready_r; `ifdef PERF_ENABLE reg [NUM_REQS-1:0] core_req_sel_r; diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_core_rsp_merge.v similarity index 84% rename from hw/rtl/cache/VX_cache_core_rsp_merge.v rename to hw/rtl/cache/VX_core_rsp_merge.v index 71a8b85e..14823b0d 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_core_rsp_merge.v @@ -1,6 +1,6 @@ `include "VX_cache_define.vh" -module VX_cache_core_rsp_merge #( +module VX_core_rsp_merge #( parameter CACHE_ID = 0, // Number of Word requests per cycle @@ -28,10 +28,11 @@ module VX_cache_core_rsp_merge #( output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, // Core Response - output wire [NUM_REQS-1:0] core_rsp_valid, - output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, + output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid, + output wire [NUM_REQS-1:0] core_rsp_tmask, + output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, - input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready + input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready ); `UNUSED_PARAM (CACHE_ID) @@ -100,9 +101,6 @@ module VX_cache_core_rsp_merge #( end - wire core_rsp_valid_out; - wire [NUM_REQS-1:0] core_rsp_valid_out_mask; - wire core_rsp_valid_any = (| per_bank_core_rsp_valid); VX_skid_buffer #( @@ -113,13 +111,11 @@ module VX_cache_core_rsp_merge #( .valid_in (core_rsp_valid_any), .data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}), .ready_in (core_rsp_ready_unqual), - .valid_out (core_rsp_valid_out), - .data_out ({core_rsp_valid_out_mask, core_rsp_tag, core_rsp_data}), + .valid_out (core_rsp_valid), + .data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}), .ready_out (core_rsp_ready) ); - assign core_rsp_valid = {NUM_REQS{core_rsp_valid_out}} & core_rsp_valid_out_mask; - end else begin `UNUSED_VAR (per_bank_core_rsp_pmask) @@ -167,6 +163,8 @@ module VX_cache_core_rsp_merge #( ); end + assign core_rsp_tmask = core_rsp_valid; + end for (genvar i = 0; i < NUM_BANKS; i++) begin @@ -181,38 +179,48 @@ module VX_cache_core_rsp_merge #( if (NUM_REQS > 1) begin - reg [NUM_REQS-1:0] core_rsp_valid_unqual; - reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; + reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; if (CORE_TAG_ID_BITS != 0) begin + reg [NUM_REQS-1:0] core_rsp_tmask_unqual; + always @(*) begin - core_rsp_valid_unqual = 0; - core_rsp_tag_unqual = per_bank_core_rsp_tag; - core_rsp_data_unqual = 'x; - core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; - core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; + core_rsp_tmask_unqual = 0; + core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; + + core_rsp_tag_unqual = per_bank_core_rsp_tag; + + core_rsp_data_unqual = 'x; + core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; end + assign core_rsp_valid = per_bank_core_rsp_valid; + assign core_rsp_tmask = core_rsp_tmask_unqual; assign per_bank_core_rsp_ready = core_rsp_ready; end else begin + reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual; + always @(*) begin core_rsp_valid_unqual = 0; - core_rsp_tag_unqual = 'x; - core_rsp_data_unqual = 'x; core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; - core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag; - core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; + + core_rsp_tag_unqual = 'x; + core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag; + + core_rsp_data_unqual = 'x; + core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; end + assign core_rsp_valid = core_rsp_valid_unqual; + assign core_rsp_tmask = core_rsp_valid_unqual; assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid]; end - assign core_rsp_valid = core_rsp_valid_unqual; assign core_rsp_tag = core_rsp_tag_unqual; assign core_rsp_data = core_rsp_data_unqual; @@ -220,6 +228,7 @@ module VX_cache_core_rsp_merge #( `UNUSED_VAR(per_bank_core_rsp_tid) assign core_rsp_valid = per_bank_core_rsp_valid; + assign core_rsp_tmask = per_bank_core_rsp_valid; assign core_rsp_tag = per_bank_core_rsp_tag; assign core_rsp_data = per_bank_core_rsp_data; assign per_bank_core_rsp_ready = core_rsp_ready; diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index 390ce32e..2ab36f0e 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -38,13 +38,15 @@ module VX_nc_bypass #( input wire [NUM_REQS-1:0] core_req_ready_out, // Core response in - input wire [NUM_REQS-1:0] core_rsp_valid_in, + input wire [NUM_RSP_TAGS-1:0] core_rsp_valid_in, + input wire [NUM_REQS-1:0] core_rsp_tmask_in, input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in, input wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in, output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in, // Core response out - output wire [NUM_REQS-1:0] core_rsp_valid_out, + output wire [NUM_RSP_TAGS-1:0] core_rsp_valid_out, + output wire [NUM_REQS-1:0] core_rsp_tmask_out, output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out, output wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out, input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out, @@ -209,39 +211,68 @@ module VX_nc_bypass #( wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT]; - if (NUM_REQS > 1) begin + if (NUM_RSP_TAGS > 1) begin wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; - reg [NUM_REQS-1:0] core_rsp_valid_in_r; + reg [NUM_REQS-1:0] rsp_nc_valid_r; always @(*) begin - core_rsp_valid_in_r = 0; - core_rsp_valid_in_r[rsp_tid] = 1; + rsp_nc_valid_r = 0; + rsp_nc_valid_r[rsp_tid] = is_mem_rsp_nc; end - assign core_rsp_valid_out = is_mem_rsp_nc ? core_rsp_valid_in_r : core_rsp_valid_in; - assign core_rsp_ready_in = is_mem_rsp_nc ? '0 : core_rsp_ready_out; - end else begin - assign core_rsp_valid_out = is_mem_rsp_nc || core_rsp_valid_in; - assign core_rsp_ready_in = ~is_mem_rsp_nc && core_rsp_ready_out; - end - - if (D != 0) begin - wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_data_out[i] = is_mem_rsp_nc ? - mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i]; - end - end else begin - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i]; - end - end - for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin - assign core_rsp_tag_out[i] = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i]; + assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r; + assign core_rsp_tmask_out = core_rsp_tmask_in; + assign core_rsp_ready_in = core_rsp_ready_out & ~rsp_nc_valid_r; + + if (D != 0) begin + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_data_out[i] = rsp_nc_valid_r[i] ? + mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i]; + end + end else begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_data_out[i] = rsp_nc_valid_r[i] ? mem_rsp_data_in : core_rsp_data_in[i]; + end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_tag_out[i] = rsp_nc_valid_r[i] ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i]; + end + end else begin + assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc; + assign core_rsp_tag_out = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in; + assign core_rsp_ready_in = core_rsp_ready_out && ~is_mem_rsp_nc; + + if (NUM_REQS > 1) begin + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + reg [NUM_REQS-1:0] core_rsp_tmask_in_r; + always @(*) begin + core_rsp_tmask_in_r = 0; + core_rsp_tmask_in_r[rsp_tid] = 1; + end + assign core_rsp_tmask_out = is_mem_rsp_nc ? core_rsp_tmask_in_r : core_rsp_tmask_in; + end else begin + assign core_rsp_tmask_out = core_rsp_valid_out; + end + + if (D != 0) begin + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_data_out[i] = is_mem_rsp_nc ? + mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i]; + end + end else begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i]; + end + end end // memory response handling assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT]; + assign mem_rsp_data_out = mem_rsp_data_in; + assign mem_rsp_tag_out = mem_rsp_tag_in; if (NUM_RSP_TAGS > 1) begin wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; @@ -250,7 +281,4 @@ module VX_nc_bypass #( assign mem_rsp_ready_in = is_mem_rsp_nc ? core_rsp_ready_out : mem_rsp_ready_out; end - assign mem_rsp_data_out = mem_rsp_data_in; - assign mem_rsp_tag_out = mem_rsp_tag_in; - endmodule diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 44b2f42b..c6316db2 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -42,7 +42,8 @@ module VX_shared_mem #( output wire [NUM_REQS-1:0] core_req_ready, // Core response - output wire [NUM_REQS-1:0] core_rsp_valid, + output wire core_rsp_valid, + output wire [NUM_REQS-1:0] core_rsp_tmask, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, input wire core_rsp_ready @@ -63,7 +64,7 @@ module VX_shared_mem #( wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual; wire per_bank_core_req_ready_unqual; - VX_cache_core_req_bank_sel #( + VX_core_req_bank_sel #( .CACHE_ID (CACHE_ID), .CACHE_LINE_SIZE (WORD_SIZE), .NUM_BANKS (NUM_BANKS), @@ -79,13 +80,13 @@ module VX_shared_mem #( `ifdef PERF_ENABLE .bank_stalls(perf_cache_if.bank_stalls), `endif - .core_req_valid (core_req_valid), - .core_req_rw (core_req_rw), - .core_req_addr (core_req_addr), - .core_req_byteen(core_req_byteen), - .core_req_data (core_req_data), - .core_req_tag (core_req_tag), - .core_req_ready (core_req_ready), + .core_req_valid (core_req_valid), + .core_req_rw (core_req_rw), + .core_req_addr (core_req_addr), + .core_req_byteen (core_req_byteen), + .core_req_data (core_req_data), + .core_req_tag (core_req_tag), + .core_req_ready (core_req_ready), .per_bank_core_req_valid (per_bank_core_req_valid_unqual), .per_bank_core_req_tid (per_bank_core_req_tid_unqual), .per_bank_core_req_rw (per_bank_core_req_rw_unqual), @@ -233,9 +234,6 @@ module VX_shared_mem #( end end - wire [NUM_REQS-1:0] core_rsp_valids_out; - wire core_rsp_valid_out; - assign crsq_in_valid = ~creq_empty && core_req_has_read; VX_skid_buffer #( @@ -246,13 +244,11 @@ module VX_shared_mem #( .valid_in (crsq_in_valid), .data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}), .ready_in (crsq_in_ready), - .valid_out (core_rsp_valid_out), - .data_out ({core_rsp_valids_out, core_rsp_data, core_rsp_tag}), + .valid_out (core_rsp_valid), + .data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}), .ready_out (core_rsp_ready) ); - assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}}; - `ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1; @@ -342,7 +338,7 @@ module VX_shared_mem #( assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); if (CORE_TAG_ID_BITS != 0) begin - assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & {NUM_REQS{!core_rsp_ready}}); + assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}}); end else begin assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); end diff --git a/hw/rtl/interfaces/VX_dcache_core_rsp_if.v b/hw/rtl/interfaces/VX_dcache_core_rsp_if.v index c2196b77..8ce59713 100644 --- a/hw/rtl/interfaces/VX_dcache_core_rsp_if.v +++ b/hw/rtl/interfaces/VX_dcache_core_rsp_if.v @@ -9,10 +9,11 @@ interface VX_dcache_core_rsp_if #( parameter CORE_TAG_WIDTH = 1 ) (); - wire [NUM_REQS-1:0] valid; + wire valid; + wire [NUM_REQS-1:0] tmask; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data; - wire [CORE_TAG_WIDTH-1:0] tag; - wire ready; + wire [CORE_TAG_WIDTH-1:0] tag; + wire ready; endinterface diff --git a/hw/syn/quartus/unittest/Makefile b/hw/syn/quartus/unittest/Makefile index 43d17d0f..81219d6f 100644 --- a/hw/syn/quartus/unittest/Makefile +++ b/hw/syn/quartus/unittest/Makefile @@ -1,6 +1,6 @@ PROJECT = Unittest -TOP_LEVEL_ENTITY = VX_cache_core_req_bank_sel -SRC_FILE = VX_cache_core_req_bank_sel.v +TOP_LEVEL_ENTITY = VX_core_req_bank_sel +SRC_FILE = VX_core_req_bank_sel.v RTL_DIR = ../../../../rtl FAMILY = "Arria 10"