From 41069ba18820b96ec7d2913393b8a2825c954a28 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 6 Jun 2021 20:54:36 -0700 Subject: [PATCH] non-cacheable memory address fixes --- hw/rtl/VX_databus_arb.v | 127 ---------------------------------------- hw/rtl/VX_define.vh | 21 +++++-- hw/rtl/VX_lsu_unit.v | 17 +++--- hw/rtl/VX_mem_unit.v | 124 +++++++++++++++++++++++++-------------- hw/rtl/VX_smem_arb.v | 96 ++++++++++++++++++++++++++++++ hw/rtl/cache/VX_cache.v | 10 +++- hw/scripts/scope.json | 4 +- 7 files changed, 207 insertions(+), 192 deletions(-) delete mode 100644 hw/rtl/VX_databus_arb.v create mode 100644 hw/rtl/VX_smem_arb.v diff --git a/hw/rtl/VX_databus_arb.v b/hw/rtl/VX_databus_arb.v deleted file mode 100644 index 47b6c6c1..00000000 --- a/hw/rtl/VX_databus_arb.v +++ /dev/null @@ -1,127 +0,0 @@ -`include "VX_define.vh" - -module VX_databus_arb ( - input wire clk, - input wire reset, - - // input request - VX_dcache_core_req_if core_req_if, - - // output requests - VX_dcache_core_req_if cache_req_if, - VX_dcache_core_req_if smem_req_if, - - // input responses - VX_dcache_core_rsp_if cache_rsp_if, - VX_dcache_core_rsp_if smem_rsp_if, - - // output response - VX_dcache_core_rsp_if core_rsp_if -); - localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE); - localparam REQ_ADDRW = 32 - REQ_ASHIFT; - localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; - localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; - - // - // handle requests - // - - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - if (`SM_ENABLE) begin - wire cache_req_valid_out; - wire cache_req_ready_out; - wire is_smem_addr_out; - - wire is_smem_addr_in = core_req_if.tag[i][1]; - - VX_skid_buffer #( - .DATAW (REQ_DATAW) - ) out_buffer ( - .clk (clk), - .reset (reset), - .valid_in (core_req_if.valid[i]), - .data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}), - .ready_in (core_req_if.ready[i]), - .valid_out (cache_req_valid_out), - .data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}), - .ready_out (cache_req_ready_out) - ); - - assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out; - assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out; - assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i]; - - assign smem_req_if.addr[i] = cache_req_if.addr[i]; - assign smem_req_if.rw[i] = cache_req_if.rw[i]; - assign smem_req_if.byteen[i] = cache_req_if.byteen[i]; - assign smem_req_if.data[i] = cache_req_if.data[i]; - assign smem_req_if.tag[i] = cache_req_if.tag[i]; - - end else begin - - VX_skid_buffer #( - .DATAW (REQ_DATAW) - ) out_buffer ( - .clk (clk), - .reset (reset), - .valid_in (core_req_if.valid[i]), - .data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}), - .ready_in (core_req_if.ready[i]), - .valid_out (cache_req_if.valid[i]), - .data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}), - .ready_out (cache_req_if.ready[i]) - ); - - end - end - - // - // handle responses - // - - if (`SM_ENABLE ) begin - - wire [1:0][RSP_DATAW-1:0] rsp_data_in; - wire [1:0] rsp_valid_in; - wire [1:0] rsp_ready_in; - - wire core_rsp_valid; - wire [`NUM_THREADS-1:0] core_rsp_valid_tmask; - - assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag}; - assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag}; - - assign rsp_valid_in[0] = (| cache_rsp_if.valid); - assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE; - - VX_stream_arbiter #( - .NUM_REQS (2), - .DATAW (RSP_DATAW), - .BUFFERED (1) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (rsp_valid_in), - .data_in (rsp_data_in), - .ready_in (rsp_ready_in), - .valid_out (core_rsp_valid), - .data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}), - .ready_out (core_rsp_if.ready) - ); - - assign cache_rsp_if.ready = rsp_ready_in[0]; - assign smem_rsp_if.ready = rsp_ready_in[1]; - - assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask; - - end else begin - - assign core_rsp_if.valid = cache_rsp_if.valid; - assign core_rsp_if.tag = cache_rsp_if.tag; - assign core_rsp_if.data = cache_rsp_if.data; - assign cache_rsp_if.ready = core_rsp_if.ready; - - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 3f80fcc1..2e7004db 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -237,8 +237,8 @@ `define DBG_CACHE_REQ_MDATAW 0 `endif -// Shared memory and non-cacheable flags -`define SM_NC_BITS 2 +// non-cacheable address bit +`define NC_ADDR_BITS 1 ////////////////////////// Icache Configurable Knobs ////////////////////////// @@ -269,9 +269,15 @@ // Memory request data bits `define IMEM_LINE_WIDTH (`ICACHE_LINE_SIZE * 8) +// Memory request address bits +`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE)) + // Memory byte enable bits `define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE +// Memory request tag bits +`define IMEM_TAG_WIDTH `IMEM_ADDR_WIDTH + ////////////////////////// Dcache Configurable Knobs ////////////////////////// // Cache ID @@ -283,9 +289,12 @@ // Word size in bytes `define DWORD_SIZE 4 +// Core request address bits +`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE)) + // TAG sharing enable `define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE) -`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `SM_NC_BITS) +`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE) // Input request tag bits `define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) @@ -305,7 +314,7 @@ // Memory request tag bits `define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE) `define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH) -`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `SM_NC_BITS), `_DNC_MEM_TAG_WIDTH) +`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH) ////////////////////////// SM Configurable Knobs ////////////////////////////// @@ -350,7 +359,7 @@ // Memory request tag bits `define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE) `define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH) -`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `SM_NC_BITS), `_L2NC_MEM_TAG_WIDTH) +`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH) `define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS))) ////////////////////////// L3cache Configurable Knobs ///////////////////////// @@ -382,7 +391,7 @@ // Memory request tag bits `define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE) `define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH) -`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `SM_NC_BITS), `_L3NC_MEM_TAG_WIDTH) +`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH) `define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS))) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index f1f99162..e5dffbfe 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -25,7 +25,7 @@ module VX_lsu_unit #( localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE); localparam REQ_ADDRW = 32 - REQ_ASHIFT; - localparam ADDR_TYPEW = 1 + `SM_ENABLE; + localparam ADDR_TYPEW = `NC_ADDR_BITS + `SM_ENABLE; `STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter")) `STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter")) @@ -60,20 +60,17 @@ module VX_lsu_unit #( end wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches); - wire [`NUM_THREADS-1:0] is_addr_sm, is_addr_nc; - for (genvar i = 0; i < `NUM_THREADS; i++) begin - // is shared memory address - assign is_addr_sm[i] = (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] >= MEM_ADDRW'((`SMEM_BASE_ADDR - `SMEM_SIZE) >> MEM_ASHIFT)) - & (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT)); - // is non-cacheable address - assign is_addr_nc[i] = (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT)); + wire is_addr_nc = (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT)); if (`SM_ENABLE) begin - assign lsu_addr_type[i] = {is_addr_sm[i], is_addr_nc[i]}; + // is shared memory address + wire is_addr_sm = (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] >= MEM_ADDRW'((`SMEM_BASE_ADDR - `SMEM_SIZE) >> MEM_ASHIFT)) + & (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT)); + assign lsu_addr_type[i] = {is_addr_nc, is_addr_sm}; end else begin - assign lsu_addr_type[i] = {1'b0, is_addr_nc[i]}; + assign lsu_addr_type[i] = is_addr_nc; end end diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index f95f8547..5c98bc1a 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -29,53 +29,39 @@ module VX_mem_unit # ( VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if(); `endif + VX_cache_mem_req_if #( + .MEM_LINE_WIDTH (`IMEM_LINE_WIDTH), + .MEM_ADDR_WIDTH (`IMEM_ADDR_WIDTH), + .MEM_TAG_WIDTH (`IMEM_TAG_WIDTH) + ) icache_mem_req_if(); + + VX_cache_mem_rsp_if #( + .MEM_LINE_WIDTH (`IMEM_LINE_WIDTH), + .MEM_TAG_WIDTH (`IMEM_TAG_WIDTH) + ) icache_mem_rsp_if(); + VX_cache_mem_req_if #( .MEM_LINE_WIDTH (`DMEM_LINE_WIDTH), .MEM_ADDR_WIDTH (`DMEM_ADDR_WIDTH), .MEM_TAG_WIDTH (`DMEM_TAG_WIDTH) - ) dcache_mem_req_if(), icache_mem_req_if(); + ) dcache_mem_req_if(); VX_cache_mem_rsp_if #( .MEM_LINE_WIDTH (`DMEM_LINE_WIDTH), .MEM_TAG_WIDTH (`DMEM_TAG_WIDTH) - ) dcache_mem_rsp_if(), icache_mem_rsp_if(); + ) dcache_mem_rsp_if(); VX_dcache_core_req_if #( .NUM_REQS (`DNUM_REQS), .WORD_SIZE (`DWORD_SIZE), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH) + .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) ) dcache_req_if(); VX_dcache_core_rsp_if #( .NUM_REQS (`DNUM_REQS), .WORD_SIZE (`DWORD_SIZE), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH) - ) dcache_rsp_if(); - - VX_dcache_core_req_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH) - ) smem_req_if(); - - VX_dcache_core_rsp_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH) - ) smem_rsp_if(); - - VX_databus_arb databus_arb ( - .clk (clk), - .reset (reset), - - .core_req_if (dcache_core_req_if), - .cache_req_if (dcache_req_if), - .smem_req_if (smem_req_if), - - .cache_rsp_if (dcache_rsp_if), - .smem_rsp_if (smem_rsp_if), - .core_rsp_if (dcache_core_rsp_if) - ); + .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + ) dcache_rsp_if(); wire icache_reset, dcache_reset; @@ -101,7 +87,7 @@ module VX_mem_unit # ( .WRITE_ENABLE (0), .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), - .MEM_TAG_WIDTH (`DMEM_TAG_WIDTH) + .MEM_TAG_WIDTH (`IMEM_TAG_WIDTH) ) icache ( `SCOPE_BIND_VX_mem_unit_icache @@ -156,8 +142,8 @@ module VX_mem_unit # ( .MRSQ_SIZE (`DMRSQ_SIZE), .MREQ_SIZE (`DMREQ_SIZE), .WRITE_ENABLE (1), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), + .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE), + .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE), .MEM_TAG_WIDTH (`DMEM_TAG_WIDTH), .NC_ENABLE (1) ) dcache ( @@ -201,7 +187,31 @@ module VX_mem_unit # ( .mem_rsp_ready (dcache_mem_rsp_if.ready) ); - if (`SM_ENABLE) begin + if (`SM_ENABLE) begin + VX_dcache_core_req_if #( + .NUM_REQS (`DNUM_REQS), + .WORD_SIZE (`DWORD_SIZE), + .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + ) smem_req_if(); + + VX_dcache_core_rsp_if #( + .NUM_REQS (`DNUM_REQS), + .WORD_SIZE (`DWORD_SIZE), + .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + ) smem_rsp_if(); + + VX_smem_arb smem_arb ( + .clk (clk), + .reset (reset), + + .core_req_if (dcache_core_req_if), + .cache_req_if (dcache_req_if), + .smem_req_if (smem_req_if), + + .cache_rsp_if (dcache_rsp_if), + .smem_rsp_if (smem_rsp_if), + .core_rsp_if (dcache_core_rsp_if) + ); wire scache_reset; @@ -218,8 +228,8 @@ module VX_mem_unit # ( .WORD_SIZE (`SWORD_SIZE), .NUM_REQS (`SNUM_REQS), .CREQ_SIZE (`SCREQ_SIZE), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), + .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE), + .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE), .BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET) ) smem ( .clk (clk), @@ -243,10 +253,36 @@ module VX_mem_unit # ( .core_rsp_data (smem_rsp_if.data), .core_rsp_tag (smem_rsp_if.tag), .core_rsp_ready (smem_rsp_if.ready) - ); - + ); + end else begin + // core to D-cache request + for (genvar i = 0; i < `DNUM_REQS; ++i) begin + VX_skid_buffer #( + .DATAW (`DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH) + ) core_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (dcache_core_req_if.valid[i]), + .data_in ({dcache_core_req_if.addr[i], dcache_core_req_if.rw[i], dcache_core_req_if.byteen[i], dcache_core_req_if.data[i], dcache_core_req_if.tag[i]}), + .ready_in (dcache_core_req_if.ready[i]), + .valid_out (dcache_req_if.valid[i]), + .data_out ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}), + .ready_out (dcache_req_if.ready[i]) + ); + end + + // D-cache to core reponse + assign dcache_core_rsp_if.valid = dcache_rsp_if.valid; + assign dcache_core_rsp_if.tag = dcache_rsp_if.tag; + assign dcache_core_rsp_if.data = dcache_rsp_if.data; + assign dcache_rsp_if.ready = dcache_core_rsp_if.ready; end + wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag); + wire [`DMEM_TAG_WIDTH-1:0] icache_mem_rsp_tag; + assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`IMEM_TAG_WIDTH-1:0]; + `UNUSED_VAR (icache_mem_rsp_tag) + VX_mem_arb #( .NUM_REQS (2), .DATA_WIDTH (`DMEM_LINE_WIDTH), @@ -265,7 +301,7 @@ module VX_mem_unit # ( .req_byteen_in ({dcache_mem_req_if.byteen, icache_mem_req_if.byteen}), .req_addr_in ({dcache_mem_req_if.addr, icache_mem_req_if.addr}), .req_data_in ({dcache_mem_req_if.data, icache_mem_req_if.data}), - .req_tag_in ({dcache_mem_req_if.tag, icache_mem_req_if.tag}), + .req_tag_in ({dcache_mem_req_if.tag, icache_mem_req_tag}), .req_ready_in ({dcache_mem_req_if.ready, icache_mem_req_if.ready}), // Memory request @@ -278,10 +314,10 @@ module VX_mem_unit # ( .req_ready_out (mem_req_if.ready), // Source response - .rsp_valid_out ({dcache_mem_rsp_if.valid, icache_mem_rsp_if.valid}), - .rsp_data_out ({dcache_mem_rsp_if.data, icache_mem_rsp_if.data}), - .rsp_tag_out ({dcache_mem_rsp_if.tag, icache_mem_rsp_if.tag}), - .rsp_ready_out ({dcache_mem_rsp_if.ready, icache_mem_rsp_if.ready}), + .rsp_valid_out ({dcache_mem_rsp_if.valid, icache_mem_rsp_if.valid}), + .rsp_data_out ({dcache_mem_rsp_if.data, icache_mem_rsp_if.data}), + .rsp_tag_out ({dcache_mem_rsp_if.tag, icache_mem_rsp_tag}), + .rsp_ready_out ({dcache_mem_rsp_if.ready, icache_mem_rsp_if.ready}), // Memory response .rsp_valid_in (mem_rsp_if.valid), @@ -324,7 +360,7 @@ end end else begin perf_mem_lat_per_cycle <= perf_mem_lat_per_cycle + `PERF_CTR_BITS'($signed(2'((mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready) && !(mem_rsp_if.valid && mem_rsp_if.ready)) - - 2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready)))); + 2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready)))); end end diff --git a/hw/rtl/VX_smem_arb.v b/hw/rtl/VX_smem_arb.v new file mode 100644 index 00000000..13cb5307 --- /dev/null +++ b/hw/rtl/VX_smem_arb.v @@ -0,0 +1,96 @@ +`include "VX_define.vh" + +module VX_smem_arb ( + input wire clk, + input wire reset, + + // input request + VX_dcache_core_req_if core_req_if, + + // output requests + VX_dcache_core_req_if cache_req_if, + VX_dcache_core_req_if smem_req_if, + + // input responses + VX_dcache_core_rsp_if cache_rsp_if, + VX_dcache_core_rsp_if smem_rsp_if, + + // output response + VX_dcache_core_rsp_if core_rsp_if +); + localparam REQ_DATAW = 1 + `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH - 1; + localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; + + // + // handle requests + // + + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + wire cache_req_valid_out; + wire cache_req_ready_out; + wire is_smem_addr_out; + + wire is_smem_addr_in = core_req_if.tag[i][0]; + + VX_skid_buffer #( + .DATAW (REQ_DATAW) + ) out_buffer ( + .clk (clk), + .reset (reset), + .valid_in (core_req_if.valid[i]), + .data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}), + .ready_in (core_req_if.ready[i]), + .valid_out (cache_req_valid_out), + .data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}), + .ready_out (cache_req_ready_out) + ); + + assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out; + assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out; + assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i]; + + assign smem_req_if.addr[i] = cache_req_if.addr[i]; + assign smem_req_if.rw[i] = cache_req_if.rw[i]; + assign smem_req_if.byteen[i] = cache_req_if.byteen[i]; + assign smem_req_if.data[i] = cache_req_if.data[i]; + assign smem_req_if.tag[i] = cache_req_if.tag[i]; + end + + // + // handle responses + // + + wire [1:0][RSP_DATAW-1:0] rsp_data_in; + wire [1:0] rsp_valid_in; + wire [1:0] rsp_ready_in; + + wire core_rsp_valid; + wire [`NUM_THREADS-1:0] core_rsp_valid_tmask; + + assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}; + assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}}; + + assign rsp_valid_in[0] = (| cache_rsp_if.valid); + assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE; + + VX_stream_arbiter #( + .NUM_REQS (2), + .DATAW (RSP_DATAW), + .BUFFERED (1) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (rsp_valid_in), + .data_in (rsp_data_in), + .ready_in (rsp_ready_in), + .valid_out (core_rsp_valid), + .data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}), + .ready_out (core_rsp_if.ready) + ); + + assign cache_rsp_if.ready = rsp_ready_in[0]; + assign smem_rsp_if.ready = rsp_ready_in[1]; + + assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask; + +endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index b2ed57d6..5769c404 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -243,7 +243,7 @@ module VX_cache #( assign mem_rsp_ready_out = !mrsq_full; // trim out shared memory and non-cacheable flags - assign mem_rsp_tag_out_a = mem_rsp_tag_out[2 +: `MEM_ADDR_WIDTH]; + assign mem_rsp_tag_out_a = mem_rsp_tag_out[NC_ENABLE +: `MEM_ADDR_WIDTH]; VX_fifo_queue #( .DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH), @@ -534,8 +534,12 @@ module VX_cache #( .ready_out (mem_req_ready_in) ); - // build memory tag adding shared memory and non-cacheable flags - assign mem_req_tag_in = MEM_TAG_WIDTH'({mem_req_addr_in, 1'b0, 1'b0}); + // build memory tag adding non-cacheable flag + if (NC_ENABLE) begin + assign mem_req_tag_in = MEM_TAG_WIDTH'({mem_req_addr_in, 1'b0}); + end else begin + assign mem_req_tag_in = MEM_TAG_WIDTH'(mem_req_addr_in); + end `ifdef PERF_ENABLE // per cycle: core_reads, core_writes diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index b22c2efa..4ccc6548 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -111,9 +111,9 @@ "!cci_pending_writes_full": 1, "?afu_mem_req_fire": 1, "afu_mem_req_addr": 26, - "afu_mem_req_tag": 30, + "afu_mem_req_tag": 29, "?afu_mem_rsp_fire": 1, - "afu_mem_rsp_tag": 30 + "afu_mem_rsp_tag": 29 }, "afu/vortex": { "!reset": 1,