diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 50d3ac2a..5881b11a 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -543,7 +543,7 @@ assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_ //-- VX_mem_arb #( - .NUM_REQUESTS (2), + .NUM_REQS (2), .DATA_WIDTH ($bits(t_local_mem_data)), .ADDR_WIDTH ($bits(t_local_mem_addr)), .TAG_IN_WIDTH (AVS_REQ_TAGW), @@ -561,12 +561,6 @@ VX_mem_arb #( .req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}), .req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}), - // Source response - .rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}), - .rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}), - .rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}), - .rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}), - // DRAM request .req_valid_out (dram_req_valid), .req_rw_out (dram_req_rw), @@ -575,6 +569,12 @@ VX_mem_arb #( .req_data_out (dram_req_data), .req_tag_out (dram_req_tag), .req_ready_out (dram_req_ready), + + // Source response + .rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}), + .rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}), + .rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}), + .rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}), // DRAM response .rsp_valid_in (dram_rsp_valid), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 80e32d2f..3d2a6a2e 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -202,7 +202,7 @@ module VX_cluster #( end VX_io_arb #( - .NUM_REQUESTS (`NUM_CORES), + .NUM_REQS (`NUM_CORES), .WORD_SIZE (4), .TAG_IN_WIDTH (`DCORE_TAG_WIDTH), .TAG_OUT_WIDTH (`L2CORE_TAG_WIDTH) @@ -219,12 +219,6 @@ module VX_cluster #( .io_req_tag_in (per_core_io_req_tag), .io_req_ready_in (per_core_io_req_ready), - // input responses - .io_rsp_valid_in (per_core_io_rsp_valid), - .io_rsp_data_in (per_core_io_rsp_data), - .io_rsp_tag_in (per_core_io_rsp_tag), - .io_rsp_ready_in (per_core_io_rsp_ready), - // output request .io_req_valid_out (io_req_valid), .io_req_rw_out (io_req_rw), @@ -233,6 +227,12 @@ module VX_cluster #( .io_req_data_out (io_req_data), .io_req_tag_out (io_req_tag), .io_req_ready_out (io_req_ready), + + // input responses + .io_rsp_valid_in (per_core_io_rsp_valid), + .io_rsp_data_in (per_core_io_rsp_data), + .io_rsp_tag_in (per_core_io_rsp_tag), + .io_rsp_ready_in (per_core_io_rsp_ready), // output response .io_rsp_valid_out (io_rsp_valid), @@ -242,7 +242,9 @@ module VX_cluster #( ); VX_csr_io_arb #( - .NUM_REQUESTS (`NUM_CORES) + .NUM_REQS (`NUM_CORES), + .DATA_WIDTH (32), + .ADDR_WIDTH (12) ) csr_io_arb ( .clk (clk), .reset (reset), @@ -380,7 +382,7 @@ module VX_cluster #( VX_snp_forwarder #( .CACHE_ID (`L2CACHE_ID), - .NUM_REQUESTS (`NUM_CORES), + .NUM_REQS (`NUM_CORES), .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH), .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH), @@ -418,7 +420,7 @@ module VX_cluster #( .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), .NUM_BANKS (`L2NUM_BANKS), .WORD_SIZE (`L2WORD_SIZE), - .NUM_REQUESTS (`L2NUM_REQUESTS), + .NUM_REQS (`L2NUM_REQUESTS), .CREQ_SIZE (`L2CREQ_SIZE), .MSHR_SIZE (`L2MSHR_SIZE), .DRFQ_SIZE (`L2DRFQ_SIZE), @@ -558,7 +560,7 @@ module VX_cluster #( if (`NUM_CORES > 1) begin VX_snp_forwarder #( .CACHE_ID (`L2CACHE_ID), - .NUM_REQUESTS (`NUM_CORES), + .NUM_REQS (`NUM_CORES), .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH), .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH), @@ -602,7 +604,7 @@ module VX_cluster #( end VX_mem_arb #( - .NUM_REQUESTS (`L2NUM_REQUESTS), + .NUM_REQS (`L2NUM_REQUESTS), .DATA_WIDTH (`L2DRAM_LINE_WIDTH), .TAG_IN_WIDTH (`DDRAM_TAG_WIDTH), .TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH) @@ -619,12 +621,6 @@ module VX_cluster #( .req_tag_in (core_dram_req_tag), .req_ready_in (core_dram_req_ready), - // Core response - .rsp_valid_out (core_dram_rsp_valid), - .rsp_data_out (core_dram_rsp_data), - .rsp_tag_out (core_dram_rsp_tag), - .rsp_ready_out (core_dram_rsp_ready), - // DRAM request .req_valid_out (dram_req_valid), .req_rw_out (dram_req_rw), @@ -633,6 +629,12 @@ module VX_cluster #( .req_data_out (dram_req_data), .req_tag_out (dram_req_tag), .req_ready_out (dram_req_ready), + + // Core response + .rsp_valid_out (core_dram_rsp_valid), + .rsp_data_out (core_dram_rsp_data), + .rsp_tag_out (core_dram_rsp_tag), + .rsp_ready_out (core_dram_rsp_ready), // DRAM response .rsp_valid_in (dram_rsp_valid), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index 38efbafd..92b570e1 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -106,14 +106,14 @@ module VX_core #( assign D_dram_rsp_ready = dcache_dram_rsp_if.ready; VX_cache_core_req_if #( - .NUM_REQUESTS(`DNUM_REQUESTS), + .NUM_REQS(`DNUM_REQUESTS), .WORD_SIZE(`DWORD_SIZE), .CORE_TAG_WIDTH(`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS) ) core_dcache_req_if(),arb_dcache_req_if(), arb_io_req_if(); VX_cache_core_rsp_if #( - .NUM_REQUESTS(`DNUM_REQUESTS), + .NUM_REQS(`DNUM_REQUESTS), .WORD_SIZE(`DWORD_SIZE), .CORE_TAG_WIDTH(`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS) @@ -159,14 +159,14 @@ module VX_core #( assign I_dram_rsp_ready = icache_dram_rsp_if.ready; VX_cache_core_req_if #( - .NUM_REQUESTS(`INUM_REQUESTS), + .NUM_REQS(`INUM_REQUESTS), .WORD_SIZE(`IWORD_SIZE), .CORE_TAG_WIDTH(`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS) ) core_icache_req_if(); VX_cache_core_rsp_if #( - .NUM_REQUESTS(`INUM_REQUESTS), + .NUM_REQS(`INUM_REQUESTS), .WORD_SIZE(`IWORD_SIZE), .CORE_TAG_WIDTH(`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS) @@ -277,7 +277,7 @@ module VX_core #( // select io bus wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR); - wire io_req_select = (| core_dcache_req_if.valid) ? is_io_addr : 0; + wire io_req_select = (| core_dcache_req_if.valid) && is_io_addr; wire io_rsp_select = (| arb_io_rsp_if.valid); VX_dcache_arb dcache_io_arb ( diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index 1dfd7dfc..d2b00e16 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -1,8 +1,8 @@ `include "VX_define.vh" -module VX_csr_arb ( - input wire clk, - input wire reset, +module VX_csr_arb ( + input wire clk, + input wire reset, // bus select input wire select_io_req, @@ -22,8 +22,8 @@ module VX_csr_arb ( VX_commit_if csr_commit_if, VX_csr_io_rsp_if csr_io_rsp_if ); - - VX_csr_io_rsp_if csr_io_rsp_tmp_if(); + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) // requests assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid; @@ -41,8 +41,8 @@ module VX_csr_arb ( assign csr_io_req_if.ready = csr_req_if.ready && select_io_req; // responses - assign csr_io_rsp_tmp_if.valid = csr_rsp_if.valid & select_io_rsp; - assign csr_io_rsp_tmp_if.data = csr_rsp_if.data[0]; + assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp; + assign csr_io_rsp_if.data = csr_rsp_if.data[0]; assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp; assign csr_commit_if.wid = csr_rsp_if.wid; @@ -52,20 +52,6 @@ module VX_csr_arb ( assign csr_commit_if.wb = csr_rsp_if.wb; assign csr_commit_if.data = csr_rsp_if.data; - assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_tmp_if.ready : csr_commit_if.ready; - - // Use skid buffer on CSR IO bus to stop backpressure delay propagation - VX_skid_buffer #( - .DATAW (32) - ) io_skid_buffer ( - .clk (clk), - .reset (reset), - .valid_in (csr_io_rsp_tmp_if.valid), - .ready_in (csr_io_rsp_tmp_if.ready), - .data_in (csr_io_rsp_tmp_if.data), - .data_out (csr_io_rsp_if.data), - .valid_out (csr_io_rsp_if.valid), - .ready_out (csr_io_rsp_if.ready) - ); + assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready; endmodule diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index e8714bf0..6b7d7a5d 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -1,41 +1,45 @@ `include "VX_define.vh" module VX_csr_io_arb #( - parameter NUM_REQUESTS = 1, - parameter REQS_BITS = `LOG2UP(NUM_REQUESTS) + parameter NUM_REQS = 1, + parameter DATA_WIDTH = 1, + + parameter DATA_SIZE = (DATA_WIDTH / 8), + parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), + parameter REQS_BITS = `LOG2UP(NUM_REQS) ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, - input wire [REQS_BITS-1:0] request_id, + input wire [REQS_BITS-1:0] request_id, // input requests - input wire csr_io_req_valid_in, - input wire [11:0] csr_io_req_addr_in, - input wire csr_io_req_rw_in, - input wire [31:0] csr_io_req_data_in, - output wire csr_io_req_ready_in, + input wire csr_io_req_valid_in, + input wire [ADDR_WIDTH-1:0] csr_io_req_addr_in, + input wire csr_io_req_rw_in, + input wire [DATA_WIDTH-1:0] csr_io_req_data_in, + output wire csr_io_req_ready_in, // output request - output wire [NUM_REQUESTS-1:0] csr_io_req_valid_out, - output wire [NUM_REQUESTS-1:0][11:0] csr_io_req_addr_out, - output wire [NUM_REQUESTS-1:0] csr_io_req_rw_out, - output wire [NUM_REQUESTS-1:0][31:0] csr_io_req_data_out, - input wire [NUM_REQUESTS-1:0] csr_io_req_ready_out, + output wire [NUM_REQS-1:0] csr_io_req_valid_out, + output wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] csr_io_req_addr_out, + output wire [NUM_REQS-1:0] csr_io_req_rw_out, + output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] csr_io_req_data_out, + input wire [NUM_REQS-1:0] csr_io_req_ready_out, // input response - input wire [NUM_REQUESTS-1:0] csr_io_rsp_valid_in, - input wire [NUM_REQUESTS-1:0][31:0] csr_io_rsp_data_in, - output wire [NUM_REQUESTS-1:0] csr_io_rsp_ready_in, + input wire [NUM_REQS-1:0] csr_io_rsp_valid_in, + input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] csr_io_rsp_data_in, + output wire [NUM_REQS-1:0] csr_io_rsp_ready_in, // output response - output wire csr_io_rsp_valid_out, - output wire [31:0] csr_io_rsp_data_out, - input wire csr_io_rsp_ready_out + output wire csr_io_rsp_valid_out, + output wire [DATA_WIDTH-1:0] csr_io_rsp_data_out, + input wire csr_io_rsp_ready_out ); - if (NUM_REQUESTS > 1) begin + if (NUM_REQS > 1) begin - for (genvar i = 0; i < NUM_REQUESTS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i)); assign csr_io_req_addr_out[i] = csr_io_req_addr_in; assign csr_io_req_rw_out[i] = csr_io_req_rw_in; @@ -44,41 +48,6 @@ module VX_csr_io_arb #( assign csr_io_req_ready_in = csr_io_req_ready_out[request_id]; - /////////////////////////////////////////////////////////////////////// - - wire [REQS_BITS-1:0] rsp_idx; - wire [NUM_REQUESTS-1:0] rsp_1hot; - - VX_rr_arbiter #( - .N(NUM_REQUESTS) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .requests (csr_io_rsp_valid_in), - `UNUSED_PIN (grant_valid), - .grant_index (rsp_idx), - .grant_onehot (rsp_1hot) - ); - - wire stall = ~csr_io_rsp_ready_out && csr_io_rsp_valid_out; - - VX_generic_register #( - .N(1 + 32), - .R(1), - .PASSTHRU(NUM_REQUESTS <= 2) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({csr_io_rsp_valid_in[rsp_idx], csr_io_rsp_data_in[rsp_idx]}), - .out ({csr_io_rsp_valid_out, csr_io_rsp_data_out}) - ); - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign csr_io_rsp_ready_in[i] = rsp_1hot[i] && ~stall; - end - end else begin `UNUSED_VAR (clk) @@ -90,11 +59,24 @@ module VX_csr_io_arb #( assign csr_io_req_rw_out = csr_io_req_rw_in; assign csr_io_req_data_out = csr_io_req_data_in; assign csr_io_req_ready_in = csr_io_req_ready_out; - - assign csr_io_rsp_valid_out = csr_io_rsp_valid_in; - assign csr_io_rsp_data_out = csr_io_rsp_data_in; - assign csr_io_rsp_ready_in = csr_io_rsp_ready_out; end + /////////////////////////////////////////////////////////////////////// + + VX_stream_arbiter #( + .NUM_REQS(NUM_REQS), + .DATAW(DATA_WIDTH), + .BUFFERED(NUM_REQS >= 4) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (csr_io_rsp_valid_in), + .valid_out (csr_io_rsp_valid_out), + .data_in (csr_io_rsp_data_in), + .data_out (csr_io_rsp_data_out), + .ready_in (csr_io_rsp_ready_in), + .ready_out (csr_io_rsp_ready_out) + ); + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index ddddb1a9..685ef114 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -14,7 +14,7 @@ `define NB_BITS `LOG2UP(`NUM_BARRIERS) -`define REQS_BITS `LOG2UP(NUM_REQUESTS) +`define REQS_BITS `LOG2UP(NUM_REQS) `ifdef EXT_F_ENABLE `define NUM_REGS 64 diff --git a/hw/rtl/VX_io_arb.v b/hw/rtl/VX_io_arb.v index ab9ff6b6..227194a7 100644 --- a/hw/rtl/VX_io_arb.v +++ b/hw/rtl/VX_io_arb.v @@ -1,93 +1,85 @@ `include "VX_define.vh" module VX_io_arb #( - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, parameter WORD_SIZE = 1, parameter TAG_IN_WIDTH = 1, parameter TAG_OUT_WIDTH = 1, parameter WORD_WIDTH = WORD_SIZE * 8, parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), - parameter REQS_BITS = `CLOG2(NUM_REQUESTS) + parameter REQS_BITS = `CLOG2(NUM_REQS) ) ( input wire clk, input wire reset, // input requests - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in, - input wire [NUM_REQUESTS-1:0] io_req_rw_in, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in, - output wire [NUM_REQUESTS-1:0] io_req_ready_in, - - // input response - output wire [NUM_REQUESTS-1:0] io_rsp_valid_in, - output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in, - output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in, - input wire [NUM_REQUESTS-1:0] io_rsp_ready_in, + input wire [NUM_REQS-1:0][`NUM_THREADS-1:0] io_req_valid_in, + input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in, + input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in, + input wire [NUM_REQS-1:0] io_req_rw_in, + input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in, + input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in, + output wire [NUM_REQS-1:0] io_req_ready_in, // output request - output wire [`NUM_THREADS-1:0] io_req_valid_out, - output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out, - output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out, - output wire io_req_rw_out, - output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out, - output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out, - input wire io_req_ready_out, + output wire [`NUM_THREADS-1:0] io_req_valid_out, + output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out, + output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out, + output wire io_req_rw_out, + output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out, + output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out, + input wire io_req_ready_out, + + // input response + output wire [NUM_REQS-1:0] io_rsp_valid_in, + output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in, + output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] io_rsp_data_in, + input wire [NUM_REQS-1:0] io_rsp_ready_in, // output response - input wire io_rsp_valid_out, - input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out, - input wire [WORD_WIDTH-1:0] io_rsp_data_out, - output wire io_rsp_ready_out + input wire io_rsp_valid_out, + input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out, + input wire [WORD_WIDTH-1:0] io_rsp_data_out, + output wire io_rsp_ready_out ); - if (NUM_REQUESTS > 1) begin + wire [NUM_REQS-1:0] valids; + for (genvar i = 0; i < NUM_REQS; i++) begin + assign valids[i] = (| io_req_valid_in[i]); + end + + wire [NUM_REQS-1:0][(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH))-1:0] data_in; + for (genvar i = 0; i < NUM_REQS; i++) begin + assign data_in[i] = {{io_req_valid_in[i], io_req_tag_in[i], REQS_BITS'(i)}, io_req_addr_in[i], io_req_rw_in[i], io_req_byteen_in[i], io_req_data_in[i]}; + end - wire [NUM_REQUESTS-1:0] valids; - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign valids[i] = (| io_req_valid_in[i]); - end + wire [`NUM_THREADS-1:0] io_req_tmask_out; + wire io_req_valid_out_unqual; - wire [REQS_BITS-1:0] req_idx; - wire [NUM_REQUESTS-1:0] req_1hot; + VX_stream_arbiter #( + .NUM_REQS(NUM_REQS), + .DATAW(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)), + .BUFFERED(NUM_REQS >= 4) + ) req_arb ( + .clk (clk), + .reset (reset), + .valid_in (valids), + .valid_out (io_req_valid_out_unqual), + .data_in (data_in), + .data_out ({io_req_tmask_out, io_req_tag_out, io_req_addr_out, io_req_rw_out, io_req_byteen_out, io_req_data_out}), + .ready_in (io_req_ready_in), + .ready_out (io_req_ready_out) + ); - VX_rr_arbiter #( - .N(NUM_REQUESTS) - ) req_arb ( - .clk (clk), - .reset (reset), - .requests (valids), - `UNUSED_PIN (grant_valid), - .grant_index (req_idx), - .grant_onehot (req_1hot) - ); + assign io_req_valid_out = {`NUM_THREADS{io_req_valid_out_unqual}} & io_req_tmask_out; - wire stall = ~io_req_ready_out && (| io_req_valid_out); + /////////////////////////////////////////////////////////////////////// - VX_generic_register #( - .N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)), - .R(`NUM_THREADS), - .PASSTHRU(NUM_REQUESTS <= 2) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({io_req_valid_in[req_idx], {io_req_tag_in[req_idx], REQS_BITS'(req_idx)}, io_req_addr_in[req_idx], io_req_rw_in[req_idx], io_req_byteen_in[req_idx], io_req_data_in[req_idx]}), - .out ({io_req_valid_out, io_req_tag_out, io_req_addr_out, io_req_rw_out, io_req_byteen_out, io_req_data_out}) - ); - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign io_req_ready_in[i] = req_1hot[i] && ~stall; - end - - /////////////////////////////////////////////////////////////////////// + if (NUM_REQS > 1) begin wire [REQS_BITS-1:0] rsp_sel = io_rsp_tag_out[REQS_BITS-1:0]; - for (genvar i = 0; i < NUM_REQUESTS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin assign io_rsp_valid_in[i] = io_rsp_valid_out && (rsp_sel == REQS_BITS'(i)); assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; assign io_rsp_data_in[i] = io_rsp_data_out; @@ -100,14 +92,6 @@ module VX_io_arb #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) - assign io_req_valid_out = io_req_valid_in; - assign io_req_tag_out = io_req_tag_in; - assign io_req_addr_out = io_req_addr_in; - assign io_req_rw_out = io_req_rw_in; - assign io_req_byteen_out = io_req_byteen_in; - assign io_req_data_out = io_req_data_in; - assign io_req_ready_in = io_req_ready_out; - assign io_rsp_valid_in = io_rsp_valid_out; assign io_rsp_tag_in = io_rsp_tag_out; assign io_rsp_data_in = io_rsp_data_out; diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index 713e37e1..e8e39b70 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -1,94 +1,81 @@ `include "VX_define.vh" module VX_mem_arb #( - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, parameter DATA_WIDTH = 1, parameter TAG_IN_WIDTH = 1, parameter TAG_OUT_WIDTH = 1, parameter DATA_SIZE = (DATA_WIDTH / 8), parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), - parameter REQS_BITS = `CLOG2(NUM_REQUESTS) + parameter REQS_BITS = `CLOG2(NUM_REQS) ) ( input wire clk, input wire reset, // input requests - input wire [NUM_REQUESTS-1:0] req_valid_in, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] req_tag_in, - input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] req_addr_in, - input wire [NUM_REQUESTS-1:0] req_rw_in, - input wire [NUM_REQUESTS-1:0][DATA_SIZE-1:0] req_byteen_in, - input wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] req_data_in, - output wire [NUM_REQUESTS-1:0] req_ready_in, - - // input response - output wire [NUM_REQUESTS-1:0] rsp_valid_out, - output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out, - output wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] rsp_data_out, - input wire [NUM_REQUESTS-1:0] rsp_ready_out, + input wire [NUM_REQS-1:0] req_valid_in, + input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in, + input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_in, + input wire [NUM_REQS-1:0] req_rw_in, + input wire [NUM_REQS-1:0][DATA_SIZE-1:0] req_byteen_in, + input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_in, + output wire [NUM_REQS-1:0] req_ready_in, // output request - output wire req_valid_out, - output wire [TAG_OUT_WIDTH-1:0] req_tag_out, - output wire [ADDR_WIDTH-1:0] req_addr_out, - output wire req_rw_out, - output wire [DATA_SIZE-1:0] req_byteen_out, - output wire [DATA_WIDTH-1:0] req_data_out, - input wire req_ready_out, + output wire req_valid_out, + output wire [TAG_OUT_WIDTH-1:0] req_tag_out, + output wire [ADDR_WIDTH-1:0] req_addr_out, + output wire req_rw_out, + output wire [DATA_SIZE-1:0] req_byteen_out, + output wire [DATA_WIDTH-1:0] req_data_out, + input wire req_ready_out, + + // input response + output wire [NUM_REQS-1:0] rsp_valid_out, + output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out, + output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out, + input wire [NUM_REQS-1:0] rsp_ready_out, // output response - input wire rsp_valid_in, - input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in, - input wire [DATA_WIDTH-1:0] rsp_data_in, - output wire rsp_ready_in + input wire rsp_valid_in, + input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in, + input wire [DATA_WIDTH-1:0] rsp_data_in, + output wire rsp_ready_in ); - if (NUM_REQUESTS > 1) begin + wire [NUM_REQS-1:0][(TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH)-1:0] data_in; + for (genvar i = 0; i < NUM_REQS; i++) begin + assign data_in[i] = {{req_tag_in[i], REQS_BITS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; + end - wire [REQS_BITS-1:0] req_idx; - wire [NUM_REQUESTS-1:0] req_1hot; + VX_stream_arbiter #( + .NUM_REQS(NUM_REQS), + .DATAW(TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH), + .BUFFERED(NUM_REQS >= 4) + ) req_arb ( + .clk (clk), + .reset (reset), + .valid_in (req_valid_in), + .valid_out (req_valid_out), + .data_in (data_in), + .data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}), + .ready_in (req_ready_in), + .ready_out (req_ready_out) + ); - VX_rr_arbiter #( - .N(NUM_REQUESTS) - ) req_arb ( - .clk (clk), - .reset (reset), - .requests (req_valid_in), - `UNUSED_PIN (grant_valid), - .grant_index (req_idx), - .grant_onehot (req_1hot) - ); + /////////////////////////////////////////////////////////////////////// - wire stall = ~req_ready_out && req_valid_out; + if (NUM_REQS > 1) begin - VX_generic_register #( - .N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH), - .R(1), - .PASSTHRU(NUM_REQUESTS <= 2) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({req_valid_in[req_idx], {req_tag_in[req_idx], REQS_BITS'(req_idx)}, req_addr_in[req_idx], req_rw_in[req_idx], req_byteen_in[req_idx], req_data_in[req_idx]}), - .out ({req_valid_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}) - ); - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign req_ready_in[i] = req_1hot[i] && ~stall; - end - - /////////////////////////////////////////////////////////////////////// - - wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in[REQS_BITS-1:0]; + wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in [REQS_BITS-1:0]; - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i)); - assign rsp_tag_out[i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH]; - assign rsp_data_out[i] = rsp_data_in; + for (genvar i = 0; i < NUM_REQS; i++) begin + assign rsp_valid_out [i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i)); + assign rsp_tag_out [i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH]; + assign rsp_data_out [i] = rsp_data_in; end - assign rsp_ready_in = rsp_ready_out[rsp_sel]; + assign rsp_ready_in = rsp_ready_out [rsp_sel]; end else begin @@ -103,10 +90,10 @@ module VX_mem_arb #( assign req_data_out = req_data_in; assign req_ready_in = req_ready_out; - assign rsp_valid_out = rsp_valid_in; - assign rsp_tag_out = rsp_tag_in; - assign rsp_data_out = rsp_data_in; - assign rsp_ready_in = rsp_ready_out; + assign rsp_valid_out = rsp_valid_in; + assign rsp_tag_out = rsp_tag_in; + assign rsp_data_out = rsp_data_in; + assign rsp_ready_in = rsp_ready_out; end diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 2ae31ba0..bd788793 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -27,14 +27,14 @@ module VX_mem_unit # ( VX_cache_dram_rsp_if icache_dram_rsp_if ); VX_cache_core_req_if #( - .NUM_REQUESTS (`DNUM_REQUESTS), + .NUM_REQS (`DNUM_REQUESTS), .WORD_SIZE (`DWORD_SIZE), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS) ) core_dcache_req_qual_if(), core_smem_req_if(); VX_cache_core_rsp_if #( - .NUM_REQUESTS (`DNUM_REQUESTS), + .NUM_REQS (`DNUM_REQUESTS), .WORD_SIZE (`DWORD_SIZE), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS) @@ -64,7 +64,7 @@ module VX_mem_unit # ( .BANK_LINE_SIZE (`SBANK_LINE_SIZE), .NUM_BANKS (`SNUM_BANKS), .WORD_SIZE (`SWORD_SIZE), - .NUM_REQUESTS (`SNUM_REQUESTS), + .NUM_REQS (`SNUM_REQUESTS), .CREQ_SIZE (`SCREQ_SIZE), .MSHR_SIZE (8), .DRFQ_SIZE (1), @@ -136,7 +136,7 @@ module VX_mem_unit # ( .BANK_LINE_SIZE (`DBANK_LINE_SIZE), .NUM_BANKS (`DNUM_BANKS), .WORD_SIZE (`DWORD_SIZE), - .NUM_REQUESTS (`DNUM_REQUESTS), + .NUM_REQS (`DNUM_REQUESTS), .CREQ_SIZE (`DCREQ_SIZE), .MSHR_SIZE (`DMSHR_SIZE), .DRFQ_SIZE (`DDRFQ_SIZE), @@ -209,7 +209,7 @@ module VX_mem_unit # ( .BANK_LINE_SIZE (`IBANK_LINE_SIZE), .NUM_BANKS (`INUM_BANKS), .WORD_SIZE (`IWORD_SIZE), - .NUM_REQUESTS (`INUM_REQUESTS), + .NUM_REQS (`INUM_REQUESTS), .CREQ_SIZE (`ICREQ_SIZE), .MSHR_SIZE (`IMSHR_SIZE), .DRFQ_SIZE (`IDRFQ_SIZE), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index f7c1e4b5..90659c4e 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -57,14 +57,14 @@ module VX_pipeline #( ); // Dcache VX_cache_core_req_if #( - .NUM_REQUESTS(`NUM_THREADS), + .NUM_REQS(`NUM_THREADS), .WORD_SIZE(4), .CORE_TAG_WIDTH(`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS) ) core_dcache_req_if(); VX_cache_core_rsp_if #( - .NUM_REQUESTS(`NUM_THREADS), + .NUM_REQS(`NUM_THREADS), .WORD_SIZE(4), .CORE_TAG_WIDTH(`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS) @@ -72,14 +72,14 @@ module VX_pipeline #( // Icache VX_cache_core_req_if #( - .NUM_REQUESTS(1), + .NUM_REQS(1), .WORD_SIZE(4), .CORE_TAG_WIDTH(`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS) ) core_icache_req_if(); VX_cache_core_rsp_if #( - .NUM_REQUESTS(1), + .NUM_REQS(1), .WORD_SIZE(4), .CORE_TAG_WIDTH(`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS) diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 0e7fb5e2..2effb12e 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -243,7 +243,7 @@ module Vortex ( end VX_io_arb #( - .NUM_REQUESTS (`NUM_CLUSTERS), + .NUM_REQS (`NUM_CLUSTERS), .WORD_SIZE (4), .TAG_IN_WIDTH (`L2CORE_TAG_WIDTH), .TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH) @@ -260,12 +260,6 @@ module Vortex ( .io_req_tag_in (per_cluster_io_req_tag), .io_req_ready_in (per_cluster_io_req_ready), - // input responses - .io_rsp_valid_in (per_cluster_io_rsp_valid), - .io_rsp_data_in (per_cluster_io_rsp_data), - .io_rsp_tag_in (per_cluster_io_rsp_tag), - .io_rsp_ready_in (per_cluster_io_rsp_ready), - // output request .io_req_valid_out (io_req_valid), .io_req_rw_out (io_req_rw), @@ -274,6 +268,12 @@ module Vortex ( .io_req_data_out (io_req_data), .io_req_tag_out (io_req_tag), .io_req_ready_out (io_req_ready), + + // input responses + .io_rsp_valid_in (per_cluster_io_rsp_valid), + .io_rsp_data_in (per_cluster_io_rsp_data), + .io_rsp_tag_in (per_cluster_io_rsp_tag), + .io_rsp_ready_in (per_cluster_io_rsp_ready), // output response .io_rsp_valid_out (io_rsp_valid), @@ -283,7 +283,9 @@ module Vortex ( ); VX_csr_io_arb #( - .NUM_REQUESTS (`NUM_CLUSTERS) + .NUM_REQS (`NUM_CLUSTERS), + .DATA_WIDTH (32), + .ADDR_WIDTH (12) ) csr_io_arb ( .clk (clk), .reset (reset), @@ -353,7 +355,7 @@ module Vortex ( VX_snp_forwarder #( .CACHE_ID (`L3CACHE_ID), - .NUM_REQUESTS (`NUM_CLUSTERS), + .NUM_REQS (`NUM_CLUSTERS), .SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH), .DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), .SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH), @@ -391,7 +393,7 @@ module Vortex ( .BANK_LINE_SIZE (`L3BANK_LINE_SIZE), .NUM_BANKS (`L3NUM_BANKS), .WORD_SIZE (`L3WORD_SIZE), - .NUM_REQUESTS (`L3NUM_REQUESTS), + .NUM_REQS (`L3NUM_REQUESTS), .CREQ_SIZE (`L3CREQ_SIZE), .MSHR_SIZE (`L3MSHR_SIZE), .DRFQ_SIZE (`L3DRFQ_SIZE), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 9e23765f..6925101e 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -13,7 +13,7 @@ module VX_bank #( // Size of a word in bytes parameter WORD_SIZE = 1, // Number of Word requests per cycle - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, // Core Request Queue Size parameter CREQ_SIZE = 1, @@ -55,13 +55,13 @@ module VX_bank #( input wire reset, // Core Request - input wire [NUM_REQUESTS-1:0] core_req_valid, - input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw, - input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] core_req_byteen, - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, - input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data, - input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, - output wire core_req_ready, + input wire [NUM_REQS-1:0] core_req_valid, + input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, + input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, + input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, + output wire core_req_ready, // Core Response output wire core_rsp_valid, @@ -152,7 +152,8 @@ module VX_bank #( VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), - .SIZE(SNRQ_SIZE) + .SIZE(SNRQ_SIZE), + .BUFFERED(1) ) snp_req_queue ( .clk (clk), .reset (reset), @@ -192,7 +193,8 @@ module VX_bank #( VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), - .SIZE(DRFQ_SIZE) + .SIZE(DRFQ_SIZE), + .BUFFERED(1) ) dfp_queue ( .clk (clk), .reset (reset), @@ -231,7 +233,7 @@ module VX_bank #( VX_bank_core_req_arb #( .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), + .NUM_REQS (NUM_REQS), .CREQ_SIZE (CREQ_SIZE), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) @@ -704,7 +706,7 @@ end .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), + .NUM_REQS (NUM_REQS), .MSHR_SIZE (MSHR_SIZE), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .SNP_TAG_WIDTH (SNP_TAG_WIDTH) @@ -960,7 +962,7 @@ end if (creq_rw_st0) $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, creq_writeword_st0, debug_wid_st0, debug_pc_st0); else - $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, creq_writeword_st0, debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_inv_st0); diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index e6360ae6..d3816c45 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -2,27 +2,27 @@ module VX_bank_core_req_arb #( // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 1, // Number of Word requests per cycle - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, // Core Request Queue Size - parameter CREQ_SIZE = 1, + parameter CREQ_SIZE = 1, // core request tag size - parameter CORE_TAG_WIDTH = 1, + parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0 ) ( input wire clk, input wire reset, // Enqueue - input wire push, - input wire [NUM_REQUESTS-1:0] valids_in, - input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in, - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in, - input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in, - input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] byteen_in, - input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] writedata_in, + input wire push, + input wire [NUM_REQS-1:0] valids_in, + input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in, + input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in, + input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen_in, + input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] writedata_in, // Dequeue input wire pop, @@ -38,16 +38,16 @@ module VX_bank_core_req_arb #( output wire full ); - wire [NUM_REQUESTS-1:0] q_valids; - wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] q_tag; - wire [`CORE_REQ_TAG_COUNT-1:0] q_rw; - wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] q_byteen; - wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr; - wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] q_writedata; - wire q_push; - wire q_pop; - wire q_empty; - wire q_full; + wire [NUM_REQS-1:0] q_valids; + wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] q_tag; + wire [`CORE_REQ_TAG_COUNT-1:0] q_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] q_byteen; + wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr; + wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] q_writedata; + wire q_push; + wire q_pop; + wire q_empty; + wire q_full; always @(*) begin assert(!push || (| valids_in)); @@ -70,7 +70,7 @@ module VX_bank_core_req_arb #( `UNUSED_PIN (size) ); - if (NUM_REQUESTS > 1) begin + if (NUM_REQS > 1) begin reg [`REQS_BITS-1:0] sel_idx, sel_idx_r; reg [CORE_TAG_WIDTH-1:0] sel_tag, sel_tag_r; @@ -79,16 +79,16 @@ module VX_bank_core_req_arb #( reg [WORD_SIZE-1:0] sel_byteen, sel_byteen_r; reg [`WORD_WIDTH-1:0] sel_writedata, sel_writedata_r; - reg [$clog2(NUM_REQUESTS+1)-1:0] q_valids_cnt_r; - wire [$clog2(NUM_REQUESTS+1)-1:0] q_valids_cnt; + reg [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_r; + wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt; - reg [NUM_REQUESTS-1:0] pop_mask; + reg [NUM_REQS-1:0] pop_mask; reg fast_track; assign q_push = push; assign q_pop = pop && (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) && !fast_track; - wire [NUM_REQUESTS-1:0] requests = q_valids & ~pop_mask; + wire [NUM_REQS-1:0] requests = q_valids & ~pop_mask; always @(*) begin sel_idx = 0; @@ -98,9 +98,9 @@ module VX_bank_core_req_arb #( sel_byteen = 'x; sel_writedata = 'x; - for (integer i = 0; i < NUM_REQUESTS; i++) begin + for (integer i = 0; i < NUM_REQS; i++) begin if (requests[i]) begin - sel_idx = `REQS_BITS'(i); + sel_idx = `REQS_BITS'(i); sel_addr = q_addr[i]; if (0 == CORE_TAG_ID_BITS) begin sel_tag = q_tag[i]; @@ -114,7 +114,7 @@ module VX_bank_core_req_arb #( end VX_countones #( - .N(NUM_REQUESTS) + .N(NUM_REQS) ) counter ( .valids (q_valids), .count (q_valids_cnt) @@ -129,7 +129,7 @@ module VX_bank_core_req_arb #( if (!q_empty && ((0 == q_valids_cnt_r) || (pop && fast_track))) begin q_valids_cnt_r <= q_valids_cnt; - pop_mask <= (NUM_REQUESTS'(1) << sel_idx); + pop_mask <= (NUM_REQS'(1) << sel_idx); fast_track <= 0; end else if (pop) begin q_valids_cnt_r <= q_valids_cnt_r - 1; diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 40919cd8..b29416f0 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -12,7 +12,7 @@ module VX_cache #( // Size of a word in bytes parameter WORD_SIZE = 4, // Number of Word requests per cycle - parameter NUM_REQUESTS = 4, + parameter NUM_REQS = 4, // Core Request Queue Size parameter CREQ_SIZE = 4, @@ -57,19 +57,19 @@ module VX_cache #( input wire reset, // Core request - input wire [NUM_REQUESTS-1:0] core_req_valid, - input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw, - input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] core_req_byteen, - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, - input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data, + input wire [NUM_REQS-1:0] core_req_valid, + input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, + input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, - output wire core_req_ready, + output wire core_req_ready, // Core response - output wire [NUM_REQUESTS-1:0] core_rsp_valid, - output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, + output wire [NUM_REQS-1:0] core_rsp_valid, + output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, - input wire core_rsp_ready, + input wire core_rsp_ready, // DRAM request output wire dram_req_valid, @@ -101,9 +101,9 @@ module VX_cache #( output wire [NUM_BANKS-1:0] miss_vec ); - `STATIC_ASSERT(NUM_BANKS <= NUM_REQUESTS, ("invalid value")) + `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) - wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid; + wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid; wire [NUM_BANKS-1:0] per_bank_core_req_ready; @@ -141,7 +141,7 @@ module VX_cache #( .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS) + .NUM_REQS (NUM_REQS) ) cache_core_req_bank_sel ( .core_req_valid (core_req_valid), .core_req_addr (core_req_addr), @@ -158,13 +158,13 @@ module VX_cache #( end for (genvar i = 0; i < NUM_BANKS; i++) begin - wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid; - wire [`CORE_REQ_TAG_COUNT-1:0] curr_bank_core_req_rw; - wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen; - wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr; - wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; - wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data; - wire curr_bank_core_req_ready; + wire [NUM_REQS-1:0] curr_bank_core_req_valid; + wire [`CORE_REQ_TAG_COUNT-1:0] curr_bank_core_req_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen; + wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr; + wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; + wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data; + wire curr_bank_core_req_ready; wire curr_bank_core_rsp_valid; wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; @@ -197,7 +197,7 @@ module VX_cache #( wire curr_bank_miss; // Core Req - assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}}); + assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQS{core_req_ready}}); assign curr_bank_core_req_addr = core_req_addr; assign curr_bank_core_req_rw = core_req_rw; assign curr_bank_core_req_byteen = core_req_byteen; @@ -262,7 +262,7 @@ module VX_cache #( .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), + .NUM_REQS (NUM_REQS), .CREQ_SIZE (CREQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .DRFQ_SIZE (DRFQ_SIZE), @@ -331,7 +331,7 @@ module VX_cache #( VX_cache_core_rsp_merge #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), + .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) ) cache_core_rsp_merge ( @@ -349,26 +349,25 @@ module VX_cache #( ); if (DRAM_ENABLE) begin - VX_cache_dram_req_arb #( - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE) - ) cache_dram_req_arb ( - .clk (clk), - .reset (reset), - .per_bank_dram_req_valid (per_bank_dram_req_valid), - .per_bank_dram_req_rw (per_bank_dram_req_rw), - .per_bank_dram_req_byteen (per_bank_dram_req_byteen), - .per_bank_dram_req_addr (per_bank_dram_req_addr), - .per_bank_dram_req_data (per_bank_dram_req_data), - .per_bank_dram_req_ready (per_bank_dram_req_ready), - .dram_req_valid (dram_req_valid), - .dram_req_rw (dram_req_rw), - .dram_req_byteen (dram_req_byteen), - .dram_req_addr (dram_req_addr), - .dram_req_data (dram_req_data), - .dram_req_ready (dram_req_ready) - ); + wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH)-1:0] data_in; + for (genvar i = 0; i < NUM_BANKS; i++) begin + assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]}; + end + + VX_stream_arbiter #( + .NUM_REQS(NUM_BANKS), + .DATAW(`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH), + .BUFFERED(NUM_BANKS >= 4) + ) dram_req_arb ( + .clk (clk), + .reset (reset), + .valid_in (per_bank_dram_req_valid), + .valid_out (dram_req_valid), + .data_in (data_in), + .data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}), + .ready_in (per_bank_dram_req_ready), + .ready_out (dram_req_ready) + ); end else begin `UNUSED_VAR (per_bank_dram_req_valid) `UNUSED_VAR (per_bank_dram_req_rw) @@ -385,19 +384,19 @@ module VX_cache #( end if (FLUSH_ENABLE) begin - VX_snp_rsp_arb #( - .NUM_BANKS (NUM_BANKS), - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .SNP_TAG_WIDTH (SNP_TAG_WIDTH) - ) snp_rsp_arb ( - .clk (clk), - .reset (reset), - .per_bank_snp_rsp_valid (per_bank_snp_rsp_valid), - .per_bank_snp_rsp_tag (per_bank_snp_rsp_tag), - .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready), - .snp_rsp_valid (snp_rsp_valid), - .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready) + VX_stream_arbiter #( + .NUM_REQS(NUM_BANKS), + .DATAW(SNP_TAG_WIDTH), + .BUFFERED(NUM_BANKS >= 4) + ) snp_rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (per_bank_snp_rsp_valid), + .valid_out (snp_rsp_valid), + .data_in (per_bank_snp_rsp_tag), + .data_out (snp_rsp_tag), + .ready_in (per_bank_snp_rsp_ready), + .ready_out (snp_rsp_ready) ); end else begin `UNUSED_VAR (per_bank_snp_rsp_valid) diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 43edb1a7..f706c6bd 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -9,7 +9,7 @@ `define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_TAG_WIDTH) -`define REQS_BITS `LOG2UP(NUM_REQUESTS) +`define REQS_BITS `LOG2UP(NUM_REQS) // tag rw byteen tid `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) @@ -70,7 +70,7 @@ /////////////////////////////////////////////////////////////////////////////// -`define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQUESTS) +`define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS) `define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS-1:0] diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 7d28e8cb..66714cc8 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -2,23 +2,23 @@ module VX_cache_core_req_bank_sel #( // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter BANK_LINE_SIZE = 1, // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 1, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Number of Word requests per cycle - parameter NUM_REQUESTS = 1 + parameter NUM_REQS = 1 ) ( - input wire [NUM_REQUESTS-1:0] core_req_valid, - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, - output wire core_req_ready, + input wire [NUM_REQS-1:0] core_req_valid, + input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + output wire core_req_ready, - output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid, - input wire [NUM_BANKS-1:0] per_bank_ready + output wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid, + input wire [NUM_BANKS-1:0] per_bank_ready ); if (NUM_BANKS > 1) begin - reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r; + reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r; reg [NUM_BANKS-1:0] per_bank_ready_ignore; reg [NUM_BANKS-1:0] per_bank_ready_other; @@ -34,14 +34,14 @@ module VX_cache_core_req_bank_sel #( end end - for (integer i = 0; i < NUM_REQUESTS; i++) begin + for (integer i = 0; i < NUM_REQS; i++) begin per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i]; per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0; end end for (genvar i = 0; i < NUM_BANKS; i++) begin - for (genvar j = 0; j < NUM_REQUESTS; j++) begin + for (genvar j = 0; j < NUM_REQS; j++) begin assign per_bank_valid[i][j] = per_bank_valid_r[i][j] & per_bank_ready_other[i]; end end diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index d8e3070e..f7e54431 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -2,69 +2,68 @@ module VX_cache_core_rsp_merge #( // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 1, // Number of Word requests per cycle - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, // core request tag size - parameter CORE_TAG_WIDTH = 1, + parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Per Bank WB - input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, - input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, - input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, - input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, - output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, + input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, + input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, + input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, + input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, + output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, // Core Writeback - output wire [NUM_REQUESTS-1:0] core_rsp_valid, + output wire [NUM_REQS-1:0] core_rsp_valid, output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, - output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, - input wire core_rsp_ready + output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, + input wire core_rsp_ready ); - if (NUM_REQUESTS > 1) begin + if (NUM_BANKS > 1) begin - reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual; - reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; + reg [NUM_REQS-1:0] core_rsp_valid_unqual; reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; + reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; reg [NUM_BANKS-1:0] core_rsp_bank_select; + reg [CORE_TAG_ID_BITS-1:0] sel_tag_id; if (CORE_TAG_ID_BITS != 0) begin - wire [`BANK_BITS-1:0] sel_idx; - - VX_rr_arbiter #( - .N(NUM_BANKS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (per_bank_core_rsp_valid), - `UNUSED_PIN (grant_valid), - .grant_index (sel_idx), - `UNUSED_PIN (grant_onehot) - ); always @(*) begin core_rsp_valid_unqual = 0; - core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx]; + core_rsp_tag_unqual = 'x; + sel_tag_id = 'x; core_rsp_data_unqual = 'x; - core_rsp_bank_select = 0; + core_rsp_bank_select = 0; + for (integer i = 0; i < NUM_BANKS; i++) begin + if (per_bank_core_rsp_valid[i]) begin + core_rsp_tag_unqual = per_bank_core_rsp_tag[i]; + sel_tag_id = per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0]; + break; + end + end + for (integer i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_rsp_valid[i] - && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[sel_idx][CORE_TAG_ID_BITS-1:0])) begin + && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == sel_tag_id)) begin core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; core_rsp_bank_select[i] = 1; end - end + end end end else begin + always @(*) begin core_rsp_valid_unqual = 0; core_rsp_tag_unqual = 'x; @@ -86,8 +85,8 @@ module VX_cache_core_rsp_merge #( wire stall = ~core_rsp_ready && (| core_rsp_valid); VX_generic_register #( - .N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), - .R(NUM_REQUESTS), + .N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), + .R(NUM_REQS), .PASSTHRU(NUM_BANKS <= 2) ) pipe_reg ( .clk (clk), @@ -98,16 +97,33 @@ module VX_cache_core_rsp_merge #( .out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) ); - assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}}; + for (genvar i = 0; i < NUM_BANKS; i++) begin + assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i] && ~stall; + end end else begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (per_bank_core_rsp_tid) - assign core_rsp_valid = per_bank_core_rsp_valid; - assign core_rsp_tag = per_bank_core_rsp_tag; - assign core_rsp_data = per_bank_core_rsp_data; - assign per_bank_core_rsp_ready = core_rsp_ready; + if (NUM_REQS > 1) begin + + assign core_rsp_valid[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid; + if (CORE_TAG_ID_BITS != 0) begin + assign core_rsp_tag = per_bank_core_rsp_tag[0]; + end else begin + assign core_rsp_tag[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_tag[0]; + end + assign core_rsp_data[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0]; + assign per_bank_core_rsp_ready[0] = core_rsp_ready; + + end else begin + + `UNUSED_VAR(per_bank_core_rsp_tid) + assign core_rsp_valid = per_bank_core_rsp_valid; + assign core_rsp_tag = per_bank_core_rsp_tag[0]; + assign core_rsp_data = per_bank_core_rsp_data[0]; + assign per_bank_core_rsp_ready[0] = core_rsp_ready; + + end end endmodule diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v deleted file mode 100644 index b4a8c015..00000000 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ /dev/null @@ -1,77 +0,0 @@ -`include "VX_cache_config.vh" - -module VX_cache_dram_req_arb #( - // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, - // Number of banks - parameter NUM_BANKS = 1, - // Size of a word in bytes - parameter WORD_SIZE = 1 -) ( - input wire clk, - input wire reset, - - // Inputs - input wire [NUM_BANKS-1:0] per_bank_dram_req_valid, - input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr, - input wire [NUM_BANKS-1:0] per_bank_dram_req_rw, - input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen, - input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data, - output wire [NUM_BANKS-1:0] per_bank_dram_req_ready, - - // Outputs - output wire dram_req_valid, - output wire dram_req_rw, - output wire [BANK_LINE_SIZE-1:0] dram_req_byteen, - output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr, - output wire [`BANK_LINE_WIDTH-1:0] dram_req_data, - input wire dram_req_ready -); - - if (NUM_BANKS > 1) begin - wire sel_valid; - wire [`BANK_BITS-1:0] sel_idx; - wire [NUM_BANKS-1:0] sel_1hot; - - VX_rr_arbiter #( - .N(NUM_BANKS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (per_bank_dram_req_valid), - .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot(sel_1hot) - ); - - wire stall = ~dram_req_ready && dram_req_valid; - - VX_generic_register #( - .N(1 + `DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH), - .R(1), - .PASSTHRU(NUM_BANKS <= 2) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({sel_valid, per_bank_dram_req_addr[sel_idx], per_bank_dram_req_rw[sel_idx], per_bank_dram_req_byteen[sel_idx], per_bank_dram_req_data[sel_idx]}), - .out ({dram_req_valid, dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}) - ); - - for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_dram_req_ready[i] = sel_1hot[i] && !stall; - end - end else begin - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - - assign dram_req_valid = per_bank_dram_req_valid; - assign dram_req_rw = per_bank_dram_req_rw; - assign dram_req_byteen = per_bank_dram_req_byteen; - assign dram_req_addr = per_bank_dram_req_addr; - assign dram_req_data = per_bank_dram_req_data; - assign per_bank_dram_req_ready = dram_req_ready; - end - -endmodule diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 7ba5aa03..ee3bc71a 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -11,7 +11,7 @@ module VX_miss_resrv #( // Size of a word in bytes parameter WORD_SIZE = 1, // Number of Word requests per cycle - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, // Miss Reserv Queue Knob parameter MSHR_SIZE = 1, // core request tag size @@ -73,7 +73,7 @@ module VX_miss_resrv #( ); wire [`MSHR_METADATA_WIDTH-1:0] metadata_table; - `NO_RW_RAM_CHECK reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; + reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table; diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index a7b0aa6c..0066fdbd 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -4,51 +4,52 @@ module VX_snp_forwarder #( parameter CACHE_ID = 0, parameter SRC_ADDR_WIDTH = 1, parameter DST_ADDR_WIDTH = 1, - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, parameter SNP_TAG_WIDTH = 1, - parameter SNRQ_SIZE = 1 + parameter SNRQ_SIZE = 1, + parameter LOG_SNRQ_SIZE = `LOG2UP(SNRQ_SIZE) ) ( input wire clk, input wire reset, // Snoop request - input wire snp_req_valid, - input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, + input wire snp_req_valid, + input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_inv, + input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, + output wire snp_req_ready, // Snoop response - output wire snp_rsp_valid, - output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr, - output wire snp_rsp_inv, - output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, + output wire snp_rsp_valid, + output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr, + output wire snp_rsp_inv, + output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready, // Snoop Forwarding out - output wire [NUM_REQUESTS-1:0] snp_fwdout_valid, - output wire [NUM_REQUESTS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr, - output wire [NUM_REQUESTS-1:0] snp_fwdout_inv, - output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag, - input wire [NUM_REQUESTS-1:0] snp_fwdout_ready, + output wire [NUM_REQS-1:0] snp_fwdout_valid, + output wire [NUM_REQS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr, + output wire [NUM_REQS-1:0] snp_fwdout_inv, + output wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdout_tag, + input wire [NUM_REQS-1:0] snp_fwdout_ready, // Snoop forwarding in - input wire [NUM_REQUESTS-1:0] snp_fwdin_valid, - input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag, - output wire [NUM_REQUESTS-1:0] snp_fwdin_ready + input wire [NUM_REQS-1:0] snp_fwdin_valid, + input wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag, + output wire [NUM_REQS-1:0] snp_fwdin_ready ); localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH; - localparam NUM_REQUESTS_QUAL = NUM_REQUESTS * (1 << ADDR_DIFF); + localparam NUM_REQUESTS_QUAL = NUM_REQS * (1 << ADDR_DIFF); localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL); - `STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value")) + `STATIC_ASSERT(NUM_REQS > 1, ("invalid value")) reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0]; - wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr; + wire [LOG_SNRQ_SIZE-1:0] sfq_write_addr, sfq_read_addr; wire sfq_full; - wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag; + wire [LOG_SNRQ_SIZE-1:0] fwdin_tag; wire fwdin_valid; wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]); @@ -78,14 +79,14 @@ module VX_snp_forwarder #( ); wire fwdout_valid; - wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag; + wire [LOG_SNRQ_SIZE-1:0] fwdout_tag; wire [DST_ADDR_WIDTH-1:0] fwdout_addr; wire fwdout_inv; wire fwdout_ready; wire dispatch_hold; if (ADDR_DIFF != 0) begin - reg [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag_r; + reg [LOG_SNRQ_SIZE-1:0] fwdout_tag_r; reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r; reg fwdout_inv_r; reg dispatch_hold_r; @@ -136,9 +137,9 @@ module VX_snp_forwarder #( end end - reg [NUM_REQUESTS-1:0] snp_fwdout_ready_other; + reg [NUM_REQS-1:0] snp_fwdout_ready_other; - for (genvar i = 0; i < NUM_REQUESTS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i]; assign snp_fwdout_addr[i] = fwdout_addr; assign snp_fwdout_inv[i] = fwdout_inv; @@ -146,9 +147,9 @@ module VX_snp_forwarder #( end always @(*) begin - snp_fwdout_ready_other = {NUM_REQUESTS{1'b1}}; - for (integer i = 0; i < NUM_REQUESTS; i++) begin - for (integer j = 0; j < NUM_REQUESTS; j++) begin + snp_fwdout_ready_other = {NUM_REQS{1'b1}}; + for (integer i = 0; i < NUM_REQS; i++) begin + for (integer j = 0; j < NUM_REQS; j++) begin if (i != j) snp_fwdout_ready_other[i] &= snp_fwdout_ready[j]; end @@ -159,45 +160,20 @@ module VX_snp_forwarder #( assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold; - if (NUM_REQUESTS > 1) begin - wire sel_valid; - wire [`REQS_BITS-1:0] sel_idx; - wire [NUM_REQUESTS-1:0] sel_1hot; - - VX_rr_arbiter #( - .N(NUM_REQUESTS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (snp_fwdin_valid), - .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot (sel_1hot) - ); - - wire stall = ~fwdin_ready && fwdin_valid; - - VX_generic_register #( - .N(1 + `LOG2UP(SNRQ_SIZE)), - .R(1), - .PASSTHRU(NUM_REQUESTS <= 2) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({sel_valid, snp_fwdin_tag[sel_idx]}), - .out ({fwdin_valid, fwdin_tag}) - ); - - for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign snp_fwdin_ready[i] = sel_1hot[i] && !stall; - end - end else begin - assign fwdin_valid = snp_fwdin_valid; - assign fwdin_tag = snp_fwdin_tag; - assign snp_fwdin_ready = fwdin_ready; - end + VX_stream_arbiter #( + .NUM_REQS(NUM_REQS), + .DATAW(LOG_SNRQ_SIZE), + .BUFFERED(NUM_REQS >= 4) + ) snp_fwdin_arb ( + .clk (clk), + .reset (reset), + .valid_in (snp_fwdin_valid), + .valid_out (fwdin_valid), + .data_in (snp_fwdin_tag), + .data_out (fwdin_tag), + .ready_in (snp_fwdin_ready), + .ready_out (fwdin_ready) + ); `ifdef DBG_PRINT_CACHE_SNP always @(posedge clk) begin diff --git a/hw/rtl/cache/VX_snp_rsp_arb.v b/hw/rtl/cache/VX_snp_rsp_arb.v deleted file mode 100644 index fa3f80df..00000000 --- a/hw/rtl/cache/VX_snp_rsp_arb.v +++ /dev/null @@ -1,59 +0,0 @@ -`include "VX_cache_config.vh" - -module VX_snp_rsp_arb #( - parameter NUM_BANKS = 1, - parameter BANK_LINE_SIZE = 1, - parameter SNP_TAG_WIDTH = 1 -) ( - input wire clk, - input wire reset, - - input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid, - input wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag, - output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready, - - output wire snp_rsp_valid, - output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready -); - if (NUM_BANKS > 1) begin - wire sel_valid; - wire [`BANK_BITS-1:0] sel_idx; - wire [NUM_BANKS-1:0] sel_1hot; - - VX_rr_arbiter #( - .N(NUM_BANKS) - ) sel_arb ( - .clk (clk), - .reset (reset), - .requests (per_bank_snp_rsp_valid), - .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot(sel_1hot) - ); - - wire stall = ~snp_rsp_ready && snp_rsp_valid; - - VX_generic_register #( - .N(1 + SNP_TAG_WIDTH), - .R(1), - .PASSTHRU(NUM_BANKS <= 2) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({sel_valid, per_bank_snp_rsp_tag[sel_idx]}), - .out ({snp_rsp_valid, snp_rsp_tag}) - ); - - for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_snp_rsp_ready[i] = sel_1hot[i] && !stall; - end - end else begin - assign snp_rsp_valid = per_bank_snp_rsp_valid; - assign snp_rsp_tag = per_bank_snp_rsp_tag; - assign per_bank_snp_rsp_ready = snp_rsp_ready; - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/fp_cores/svdpi/float_dpi.cpp b/hw/rtl/fp_cores/svdpi/float_dpi.cpp index 0a8f8863..f73d336a 100644 --- a/hw/rtl/fp_cores/svdpi/float_dpi.cpp +++ b/hw/rtl/fp_cores/svdpi/float_dpi.cpp @@ -21,6 +21,7 @@ extern "C" { void dpi_ftou(int inst, bool enable, int a, int* result); void dpi_itof(int inst, bool enable, int a, int* result); void dpi_utof(int inst, bool enable, int a, int* result); + void dpi_delayed_assert(int inst, bool cond); } class ShiftRegister { @@ -238,4 +239,17 @@ void dpi_utof(int inst, bool enable, int a, int* result) { sr.ensure_init(LATENCY_ITOF); sr.push(fr.i, enable); *result = sr.top(); +} + +void dpi_delayed_assert(int inst, bool cond) { + ShiftRegister& sr = instances.get(inst); + + sr.ensure_init(2); + sr.push(!cond, 1); + + auto status = sr.top(); + if (status) { + printf("delayed assertion at %s!\n", svGetNameFromScope(svGetScope())); + std::abort(); + } } \ No newline at end of file diff --git a/hw/rtl/fp_cores/svdpi/float_dpi.vh b/hw/rtl/fp_cores/svdpi/float_dpi.vh index 358cf6c2..0131b422 100644 --- a/hw/rtl/fp_cores/svdpi/float_dpi.vh +++ b/hw/rtl/fp_cores/svdpi/float_dpi.vh @@ -15,4 +15,6 @@ import "DPI-C" context function void dpi_ftou(int inst, input logic enable, inpu import "DPI-C" context function void dpi_itof(int inst, input logic enable, input int a, output int result); import "DPI-C" context function void dpi_utof(int inst, input logic enable, input int a, output int result); +import "DPI-C" context function void dpi_delayed_assert(int inst, input logic cond); + `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_cache_core_req_if.v b/hw/rtl/interfaces/VX_cache_core_req_if.v index 5db5ee61..cb9df392 100644 --- a/hw/rtl/interfaces/VX_cache_core_req_if.v +++ b/hw/rtl/interfaces/VX_cache_core_req_if.v @@ -4,19 +4,19 @@ `include "../cache/VX_cache_config.vh" interface VX_cache_core_req_if #( - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, parameter WORD_SIZE = 1, parameter CORE_TAG_WIDTH = 1, parameter CORE_TAG_ID_BITS = 0 ) (); - wire [NUM_REQUESTS-1:0] valid; - wire [`CORE_REQ_TAG_COUNT-1:0] rw; - wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] byteen; - wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr; - wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] data; - wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag; - wire ready; + wire [NUM_REQS-1:0] valid; + wire [`CORE_REQ_TAG_COUNT-1:0] rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen; + wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr; + wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data; + wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag; + wire ready; endinterface diff --git a/hw/rtl/interfaces/VX_cache_core_rsp_if.v b/hw/rtl/interfaces/VX_cache_core_rsp_if.v index abf82149..22150e90 100644 --- a/hw/rtl/interfaces/VX_cache_core_rsp_if.v +++ b/hw/rtl/interfaces/VX_cache_core_rsp_if.v @@ -4,18 +4,18 @@ `include "../cache/VX_cache_config.vh" interface VX_cache_core_rsp_if #( - parameter NUM_REQUESTS = 1, + parameter NUM_REQS = 1, parameter WORD_SIZE = 1, parameter CORE_TAG_WIDTH = 1, parameter CORE_TAG_ID_BITS = 0 ) (); - wire [NUM_REQUESTS-1:0] valid; + wire [NUM_REQS-1:0] valid; - wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] data; + wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag; - wire ready; + wire ready; endinterface diff --git a/hw/rtl/libs/VX_fair_arbiter.v b/hw/rtl/libs/VX_fair_arbiter.v index 1656e830..14610695 100644 --- a/hw/rtl/libs/VX_fair_arbiter.v +++ b/hw/rtl/libs/VX_fair_arbiter.v @@ -1,17 +1,20 @@ `include "VX_platform.vh" module VX_fair_arbiter #( - parameter N = 1 + parameter NUM_REQS = 1, + parameter LOCK_ENABLE = 0, + parameter LOG_NUM_REQS = $clog2(NUM_REQS) ) ( - input wire clk, - input wire reset, - input wire [N-1:0] requests, - output wire [`LOG2UP(N)-1:0] grant_index, - output wire [N-1:0] grant_onehot, - output wire grant_valid + input wire clk, + input wire reset, + input wire [NUM_REQS-1:0] requests, + input wire enable, + output wire [LOG_NUM_REQS-1:0] grant_index, + output wire [NUM_REQS-1:0] grant_onehot, + output wire grant_valid ); - if (N == 1) begin + if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -21,49 +24,40 @@ module VX_fair_arbiter #( end else begin - reg [N-1:0] requests_use; - wire [N-1:0] update_value; - wire [N-1:0] late_value; - - wire refill; - wire [N-1:0] refill_value; - reg [N-1:0] refill_original; + reg [NUM_REQS-1:0] remaining; + wire [NUM_REQS-1:0] remaining_next; + wire [NUM_REQS-1:0] requests_use; + reg use_buffer; always @(posedge clk) begin if (reset) begin - requests_use <= 0; - refill_original <= 0; - end else begin - if (refill) begin - requests_use <= refill_value; - refill_original <= refill_value; - end else begin - requests_use <= update_value; - end + remaining <= 0; + use_buffer <= 0; + end else if (!LOCK_ENABLE || enable) begin + remaining <= remaining_next; + use_buffer <= (remaining_next != 0); end - end + end - assign refill = (requests_use == 0); - assign refill_value = requests; + assign requests_use = use_buffer ? remaining : requests; - reg [N-1:0] grant_onehot_r; - VX_priority_encoder #( - .N(N) + .N(NUM_REQS) ) priority_encoder ( .data_in (requests_use), .data_out (grant_index), .valid_out (grant_valid) ); + reg [NUM_REQS-1:0] grant_onehot_r; always @(*) begin - grant_onehot_r = N'(0); + grant_onehot_r = NUM_REQS'(0); grant_onehot_r[grant_index] = 1; end - assign grant_onehot = grant_onehot_r; - assign late_value = ((refill_original ^ requests) & ~refill_original); - assign update_value = (requests_use & ~grant_onehot_r) | late_value; + assign remaining_next = requests_use & ~grant_onehot_r; + + assign grant_onehot = grant_onehot_r; end endmodule diff --git a/hw/rtl/libs/VX_fixed_arbiter.v b/hw/rtl/libs/VX_fixed_arbiter.v index 3bcab5bf..7bc81751 100644 --- a/hw/rtl/libs/VX_fixed_arbiter.v +++ b/hw/rtl/libs/VX_fixed_arbiter.v @@ -1,20 +1,24 @@ `include "VX_platform.vh" module VX_fixed_arbiter #( - parameter N = 1 + parameter NUM_REQS = 1, + parameter LOCK_ENABLE = 0, + parameter LOG_NUM_REQS = $clog2(NUM_REQS) ) ( - input wire clk, - input wire reset, - input wire [N-1:0] requests, - output wire [`LOG2UP(N)-1:0] grant_index, - output wire [N-1:0] grant_onehot, - output wire grant_valid + input wire clk, + input wire reset, + input wire [NUM_REQS-1:0] requests, + input wire enable, + output wire [LOG_NUM_REQS-1:0] grant_index, + output wire [NUM_REQS-1:0] grant_onehot, + output wire grant_valid ); `UNUSED_VAR (clk) `UNUSED_VAR (reset) + `UNUSED_VAR (enable) - if (N == 1) begin + if (NUM_REQS == 1) begin assign grant_index = 0; assign grant_onehot = requests; @@ -22,22 +26,21 @@ module VX_fixed_arbiter #( end else begin - reg [N-1:0] grant_onehot_r; - VX_priority_encoder # ( - .N(N) + .N(NUM_REQS) ) priority_encoder ( .data_in (requests), .data_out (grant_index), .valid_out (grant_valid) ); + reg [NUM_REQS-1:0] grant_onehot_r; always @(*) begin - grant_onehot_r = N'(0); + grant_onehot_r = NUM_REQS'(0); grant_onehot_r[grant_index] = 1; end - assign grant_onehot = grant_onehot_r; - + + assign grant_onehot = grant_onehot_r; end endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index e56a54f1..c81c0892 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -154,7 +154,7 @@ module VX_generic_queue #( .DATAW(DATAW), .SIZE(SIZE), .BUFFERED(0), - .RWCHECK(1), + .RWCHECK(0), .FASTRAM(FASTRAM) ) dp_ram ( .clk(clk), diff --git a/hw/rtl/libs/VX_matrix_arbiter.v b/hw/rtl/libs/VX_matrix_arbiter.v index d5eb2f87..232fc0b6 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.v +++ b/hw/rtl/libs/VX_matrix_arbiter.v @@ -1,17 +1,20 @@ `include "VX_platform.vh" module VX_matrix_arbiter #( - parameter N = 1 + parameter NUM_REQS = 1, + parameter LOCK_ENABLE = 0, + parameter LOG_NUM_REQS = $clog2(NUM_REQS) ) ( - input wire clk, - input wire reset, - input wire [N-1:0] requests, - output wire [`LOG2UP(N)-1:0] grant_index, - output wire [N-1:0] grant_onehot, - output wire grant_valid + input wire clk, + input wire reset, + input wire [NUM_REQS-1:0] requests, + input wire enable, + output wire [LOG_NUM_REQS-1:0] grant_index, + output wire [NUM_REQS-1:0] grant_onehot, + output wire grant_valid ); - if (N == 1) begin + if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -22,11 +25,12 @@ module VX_matrix_arbiter #( end else begin - reg [N-1:1] state [N-1:0]; - wire [N-1:0] pri [N-1:0]; + reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; + wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_unqual; - for (genvar i = 0; i < N; i++) begin - for (genvar j = 0; j < N; j++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar j = 0; j < NUM_REQS; j++) begin if (j > i) begin assign pri[j][i] = requests[i] && state[i][j]; end @@ -37,28 +41,42 @@ module VX_matrix_arbiter #( assign pri[j][i] = 0; end end - - assign grant_onehot[i] = requests[i] && !(| pri[i]); + assign grant_unqual[i] = requests[i] && !(| pri[i]); end - for (genvar i = 0; i < N; i++) begin - for (genvar j = i + 1; j < N; j++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar j = i + 1; j < NUM_REQS; j++) begin always @(posedge clk) begin if (reset) begin state[i][j] <= 0; end else begin - state[i][j] <= (state[i][j] || grant_onehot[j]) && !grant_onehot[i]; + state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i]; end end end end + if (LOCK_ENABLE == 0) begin + `UNUSED_VAR (enable) + assign grant_onehot = grant_unqual; + end else begin + reg [NUM_REQS-1:0] grant_unqual_prev; + always @(posedge clk) begin + if (reset) begin + grant_unqual_prev <= 0; + end else if (enable) begin + grant_unqual_prev <= grant_unqual; + end + end + assign grant_onehot = enable ? grant_unqual : grant_unqual_prev; + end + VX_onehot_encoder #( - .N(N) + .NUM_REQS(NUM_REQS) ) encoder ( - .onehot (grant_onehot), + .onehot (grant_unqual), `UNUSED_PIN (valid), - .value (grant_index) + .binary (grant_index) ); assign grant_valid = (| requests); diff --git a/hw/rtl/libs/VX_onehot_encooder.v b/hw/rtl/libs/VX_onehot_encooder.v index 6b5881bc..e807a0a7 100644 --- a/hw/rtl/libs/VX_onehot_encooder.v +++ b/hw/rtl/libs/VX_onehot_encooder.v @@ -11,7 +11,7 @@ module VX_onehot_encoder #( reg valid_r; always @(*) begin - binary_r = `LOG2UP(N)'(0); + binary_r = 'x; valid_r = 1'b0; for (integer i = 0; i < N; i++) begin if (onehot[i]) begin diff --git a/hw/rtl/libs/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v index f83aeb5b..3df59169 100644 --- a/hw/rtl/libs/VX_priority_encoder.v +++ b/hw/rtl/libs/VX_priority_encoder.v @@ -1,11 +1,12 @@ `include "VX_platform.vh" module VX_priority_encoder #( - parameter N = 1 + parameter N = 1, + parameter LOGN = `LOG2UP(N) ) ( - input wire [N-1:0] data_in, - output wire [`LOG2UP(N)-1:0] data_out, - output wire valid_out + input wire [N-1:0] data_in, + output wire [LOGN-1:0] data_out, + output wire valid_out ); reg [`LOG2UP(N)-1:0] data_out_r; @@ -13,7 +14,7 @@ module VX_priority_encoder #( data_out_r = 0; for (integer i = 0; i < N; i++) begin if (data_in[i]) begin - data_out_r = `LOG2UP(N)'(i); + data_out_r = LOGN'(i); break; end end diff --git a/hw/rtl/libs/VX_rr_arbiter.v b/hw/rtl/libs/VX_rr_arbiter.v index 86d30f3a..aa347a4c 100644 --- a/hw/rtl/libs/VX_rr_arbiter.v +++ b/hw/rtl/libs/VX_rr_arbiter.v @@ -1,17 +1,20 @@ `include "VX_platform.vh" module VX_rr_arbiter #( - parameter N = 1 + parameter NUM_REQS = 1, + parameter LOCK_ENABLE = 0, + parameter LOG_NUM_REQS = $clog2(NUM_REQS) ) ( - input wire clk, - input wire reset, - input wire [N-1:0] requests, - output wire [`LOG2UP(N)-1:0] grant_index, - output wire [N-1:0] grant_onehot, - output wire grant_valid + input wire clk, + input wire reset, + input wire [NUM_REQS-1:0] requests, + input wire enable, + output wire [LOG_NUM_REQS-1:0] grant_index, + output wire [NUM_REQS-1:0] grant_onehot, + output wire grant_valid ); - if (N == 1) begin + if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -22,31 +25,34 @@ module VX_rr_arbiter #( end else begin - reg [`CLOG2(N)-1:0] grant_table [N-1:0]; - reg [`CLOG2(N)-1:0] state; - reg [N-1:0] grant_onehot_r; - + reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0]; + reg [LOG_NUM_REQS-1:0] state; + always @(*) begin - for (integer i = 0; i < N; i++) begin - grant_table[i] = `CLOG2(N)'(i); - for (integer j = 0; j < N; j++) begin - if (requests[(i+j) % N]) begin - grant_table[i] = `CLOG2(N)'((i+j) % N); + for (integer i = 0; i < NUM_REQS; i++) begin + grant_table[i] = LOG_NUM_REQS'(i); + for (integer j = 0; j < NUM_REQS; j++) begin + if (requests[(i+j) % NUM_REQS]) begin + grant_table[i] = LOG_NUM_REQS'((i+j) % NUM_REQS); end end end - grant_onehot_r = N'(0); - grant_onehot_r[grant_table[state]] = 1; end always @(posedge clk) begin if (reset) begin state <= 0; - end else begin + end else if (!LOCK_ENABLE || enable) begin state <= grant_table[state]; end end + reg [NUM_REQS-1:0] grant_onehot_r; + always @(*) begin + grant_onehot_r = NUM_REQS'(0); + grant_onehot_r[grant_table[state]] = 1; + end + assign grant_index = grant_table[state]; assign grant_onehot = grant_onehot_r; assign grant_valid = (| requests); diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v new file mode 100644 index 00000000..90bacadf --- /dev/null +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -0,0 +1,134 @@ +`include "VX_platform.vh" + +module VX_stream_arbiter #( + parameter NUM_REQS = 1, + parameter DATAW = 1, + parameter TYPE = "F", + parameter BUFFERED = 0 +) ( + input wire clk, + input wire reset, + + input wire [NUM_REQS-1:0] valid_in, + input wire [NUM_REQS-1:0][DATAW-1:0] data_in, + output wire [NUM_REQS-1:0] ready_in, + + output wire valid_out, + output wire [DATAW-1:0] data_out, + input wire ready_out + ); + localparam LOG_NUM_REQS = $clog2(NUM_REQS); + + if (NUM_REQS == 1) begin + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + assign valid_out = valid_in; + assign data_out = data_in; + assign ready_in = ready_out; + + end else begin + + wire sel_enable; + wire sel_valid; + wire [LOG_NUM_REQS-1:0] sel_idx; + wire [NUM_REQS-1:0] sel_1hot; + + if (TYPE == "X") begin + + VX_fixed_arbiter #( + .NUM_REQS(NUM_REQS), + .LOCK_ENABLE(1) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (valid_in), + .enable (sel_enable), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot(sel_1hot) + ); + + end else if (TYPE == "R") begin + + VX_rr_arbiter #( + .NUM_REQS(NUM_REQS), + .LOCK_ENABLE(1) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (valid_in), + .enable (sel_enable), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot(sel_1hot) + ); + + end else if (TYPE == "F") begin + + VX_fair_arbiter #( + .NUM_REQS(NUM_REQS), + .LOCK_ENABLE(1) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (valid_in), + .enable (sel_enable), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot(sel_1hot) + ); + + end else if (TYPE == "M") begin + + VX_matrix_arbiter #( + .NUM_REQS(NUM_REQS), + .LOCK_ENABLE(1) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (valid_in), + .enable (sel_enable), + .grant_valid (sel_valid), + .grant_index (sel_idx), + .grant_onehot(sel_1hot) + ); + + end + + if (BUFFERED) begin + + wire stall = ~ready_out && valid_out; + assign sel_enable = ~stall; + + VX_generic_register #( + .N(1 + DATAW), + .R(1) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({sel_valid, data_in[sel_idx]}), + .out ({valid_out, data_out}) + ); + + for (genvar i = 0; i < NUM_REQS; i++) begin + assign ready_in[i] = sel_1hot[i] && ~stall; + end + + end else begin + + assign sel_enable = ready_out; + + assign valid_out = sel_valid; + assign data_out = data_in[sel_idx]; + + for (genvar i = 0; i < NUM_REQS; i++) begin + assign ready_in[i] = sel_1hot[i] && ready_out; + end + end + end + +endmodule \ No newline at end of file