diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index fc845c0a..2864684e 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -5,13 +5,14 @@ module VX_mem_arb #( parameter DATA_WIDTH = 1, parameter ADDR_WIDTH = 1, parameter TAG_IN_WIDTH = 1, + parameter TAG_SEL_IDX = 0, parameter BUFFERED_REQ = 0, parameter BUFFERED_RSP = 0, parameter TYPE = "R", - parameter DATA_SIZE = (DATA_WIDTH / 8), - parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), - parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS + localparam DATA_SIZE = (DATA_WIDTH / 8), + localparam LOG_NUM_REQS = `CLOG2(NUM_REQS), + localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS ) ( input wire clk, input wire reset, @@ -52,8 +53,21 @@ module VX_mem_arb #( if (NUM_REQS > 1) begin wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged; + for (genvar i = 0; i < NUM_REQS; i++) begin - assign req_data_in_merged[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; + wire [TAG_OUT_WIDTH-1:0] req_tag_in_w; + + VX_bits_insert #( + .N (TAG_IN_WIDTH), + .S (LOG_NUM_REQS), + .POS (TAG_SEL_IDX) + ) bits_insert ( + .data_in (req_tag_in[i]), + .sel_in (LOG_NUM_REQS'(i)), + .data_out (req_tag_in_w) + ); + + assign req_data_in_merged[i] = {req_tag_in_w, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; end VX_stream_arbiter #( @@ -74,12 +88,20 @@ module VX_mem_arb #( /////////////////////////////////////////////////////////////////////// - wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0]; - wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged; - for (genvar i = 0; i < NUM_REQS; i++) begin - assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i]; - end + + wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[TAG_SEL_IDX +: LOG_NUM_REQS]; + + wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w; + + VX_bits_remove #( + .N (TAG_OUT_WIDTH), + .S (LOG_NUM_REQS), + .POS (TAG_SEL_IDX) + ) bits_remove ( + .data_in (rsp_tag_in), + .data_out (rsp_tag_in_w) + ); VX_stream_demux #( .NUM_REQS (NUM_REQS), @@ -88,14 +110,18 @@ module VX_mem_arb #( ) rsp_demux ( .clk (clk), .reset (reset), - .sel (rsp_sel), + .sel_in (rsp_sel), .valid_in (rsp_valid_in), - .data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}), + .data_in ({rsp_tag_in_w, rsp_data_in}), .ready_in (rsp_ready_in), .valid_out (rsp_valid_out), .data_out (rsp_data_out_merged), .ready_out (rsp_ready_out) - ); + ); + + for (genvar i = 0; i < NUM_REQS; i++) begin + assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i]; + end end else begin diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 4cbc1266..3c26f6c7 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -197,17 +197,49 @@ module VX_mem_unit # ( .TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) ) smem_rsp_if(); - VX_smem_arb smem_arb ( + VX_smem_arb #( + .NUM_REQS (2), + .LANES (`NUM_THREADS), + .DATA_SIZE (4), + .TAG_IN_WIDTH (`DCORE_TAG_WIDTH), + .TYPE ("X"), + .BUFFERED_REQ (2), + .BUFFERED_RSP (1) + ) smem_arb ( .clk (clk), .reset (reset), - .core_req_if (dcache_req_if), - .cache_req_if (dcache_req_tmp_if), - .smem_req_if (smem_req_if), + // input request + .req_valid_in (dcache_req_if.valid), + .req_rw_in (dcache_req_if.rw), + .req_byteen_in (dcache_req_if.byteen), + .req_addr_in (dcache_req_if.addr), + .req_data_in (dcache_req_if.data), + .req_tag_in (dcache_req_if.tag), + .req_ready_in (dcache_req_if.ready), + + // output requests + .req_valid_out ({smem_req_if.valid, dcache_req_tmp_if.valid}), + .req_rw_out ({smem_req_if.rw, dcache_req_tmp_if.rw}), + .req_byteen_out ({smem_req_if.byteen, dcache_req_tmp_if.byteen}), + .req_addr_out ({smem_req_if.addr, dcache_req_tmp_if.addr}), + .req_data_out ({smem_req_if.data, dcache_req_tmp_if.data}), + .req_tag_out ({smem_req_if.tag, dcache_req_tmp_if.tag}), + .req_ready_out ({smem_req_if.ready, dcache_req_tmp_if.ready}), + + // input responses + .rsp_valid_in ({smem_rsp_if.valid, dcache_rsp_tmp_if.valid}), + .rsp_tmask_in ({smem_rsp_if.tmask, dcache_rsp_tmp_if.tmask}), + .rsp_data_in ({smem_rsp_if.data, dcache_rsp_tmp_if.data}), + .rsp_tag_in ({smem_rsp_if.tag, dcache_rsp_tmp_if.tag}), + .rsp_ready_in ({smem_rsp_if.ready, dcache_rsp_tmp_if.ready}), - .cache_rsp_if (dcache_rsp_tmp_if), - .smem_rsp_if (smem_rsp_if), - .core_rsp_if (dcache_rsp_if) + // output response + .rsp_valid_out (dcache_rsp_if.valid), + .rsp_tmask_out (dcache_rsp_if.tmask), + .rsp_tag_out (dcache_rsp_if.tag), + .rsp_data_out (dcache_rsp_if.data), + .rsp_ready_out (dcache_rsp_if.ready) ); `RESET_RELAY (smem_reset); diff --git a/hw/rtl/VX_smem_arb.v b/hw/rtl/VX_smem_arb.v index 1c62117c..45033f5c 100644 --- a/hw/rtl/VX_smem_arb.v +++ b/hw/rtl/VX_smem_arb.v @@ -1,73 +1,160 @@ `include "VX_define.vh" -module VX_smem_arb ( - input wire clk, - input wire reset, +module VX_smem_arb #( + parameter NUM_REQS = 1, + parameter LANES = 1, + parameter DATA_SIZE = 1, + parameter TAG_IN_WIDTH = 1, + parameter TAG_SEL_IDX = 0, + parameter BUFFERED_REQ = 0, + parameter BUFFERED_RSP = 0, + parameter TYPE = "R", + + localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)), + localparam DATA_WIDTH = (8 * DATA_SIZE), + localparam LOG_NUM_REQS = `CLOG2(NUM_REQS), + localparam TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS +) ( + input wire clk, + input wire reset, // input request - VX_dcache_req_if core_req_if, + input wire [LANES-1:0] req_valid_in, + input wire [LANES-1:0] req_rw_in, + input wire [LANES-1:0][DATA_SIZE-1:0] req_byteen_in, + input wire [LANES-1:0][ADDR_WIDTH-1:0] req_addr_in, + input wire [LANES-1:0][DATA_WIDTH-1:0] req_data_in, + input wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in, + output wire [LANES-1:0] req_ready_in, - // output requests - VX_dcache_req_if cache_req_if, - VX_dcache_req_if smem_req_if, + // output requests + output wire [NUM_REQS-1:0][LANES-1:0] req_valid_out, + output wire [NUM_REQS-1:0][LANES-1:0] req_rw_out, + output wire [NUM_REQS-1:0][LANES-1:0][DATA_SIZE-1:0] req_byteen_out, + output wire [NUM_REQS-1:0][LANES-1:0][ADDR_WIDTH-1:0] req_addr_out, + output wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] req_data_out, + output wire [NUM_REQS-1:0][LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out, + input wire [NUM_REQS-1:0][LANES-1:0] req_ready_out, // input responses - VX_dcache_rsp_if cache_rsp_if, - VX_dcache_rsp_if smem_rsp_if, + input wire [NUM_REQS-1:0] rsp_valid_in, + input wire [NUM_REQS-1:0][LANES-1:0] rsp_tmask_in, + input wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] rsp_data_in, + input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] rsp_tag_in, + output wire [NUM_REQS-1:0] rsp_ready_in, // output response - VX_dcache_rsp_if core_rsp_if -); - localparam REQ_DATAW = `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + (`DCORE_TAG_WIDTH-1); - localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; + output wire rsp_valid_out, + output wire [LANES-1:0] rsp_tmask_out, + output wire [LANES-1:0][DATA_WIDTH-1:0] rsp_data_out, + output wire [TAG_IN_WIDTH-1:0] rsp_tag_out, + input wire rsp_ready_out +); + localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam RSP_DATAW = LANES * (1 + DATA_WIDTH) + TAG_IN_WIDTH; - // - // handle requests - // + if (NUM_REQS > 1) begin - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + wire [LANES-1:0][REQ_DATAW-1:0] req_data_in_merged; + wire [NUM_REQS-1:0][LANES-1:0][REQ_DATAW-1:0] req_data_out_merged; + + wire [LANES-1:0][LOG_NUM_REQS-1:0] req_sel; + wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_in_w; + + for (genvar i = 0; i < LANES; ++i) begin + assign req_sel[i] = req_tag_in[i][TAG_SEL_IDX +: LOG_NUM_REQS]; + + VX_bits_remove #( + .N (TAG_IN_WIDTH), + .S (LOG_NUM_REQS), + .POS (TAG_SEL_IDX) + ) bits_remove ( + .data_in (req_tag_in[i]), + .data_out (req_tag_in_w[i]) + ); + + assign req_data_in_merged[i] = {req_tag_in_w[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; + end - wire [1:0][REQ_DATAW-1:0] req_data_out; - VX_stream_demux #( - .NUM_REQS (2), + .NUM_REQS (NUM_REQS), + .LANES (LANES), .DATAW (REQ_DATAW), - .BUFFERED (2) + .BUFFERED (BUFFERED_REQ) ) req_demux ( .clk (clk), .reset (reset), - .sel (core_req_if.tag[i][0]), - .valid_in (core_req_if.valid[i]), - .data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}), - .ready_in (core_req_if.ready[i]), - .valid_out ({smem_req_if.valid[i], cache_req_if.valid[i]}), - .data_out (req_data_out), - .ready_out ({smem_req_if.ready[i], cache_req_if.ready[i]}) - ); + .sel_in (req_sel), + .valid_in (req_valid_in), + .data_in (req_data_in_merged), + .ready_in (req_ready_in), + .valid_out (req_valid_out), + .data_out (req_data_out_merged), + .ready_out (req_ready_out) + ); + + for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar j = 0; j < LANES; ++j) begin + assign {req_tag_out[i][j], req_addr_out[i][j], req_rw_out[i][j], req_byteen_out[i][j], req_data_out[i][j]} = req_data_out_merged[i][j]; + end + end + + /////////////////////////////////////////////////////////////////////// + + wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in_merged; + + for (genvar i = 0; i < NUM_REQS; i++) begin + wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w; + + VX_bits_insert #( + .N (TAG_OUT_WIDTH), + .S (LOG_NUM_REQS), + .POS (TAG_SEL_IDX) + ) bits_insert ( + .data_in (rsp_tag_in[i]), + .sel_in (LOG_NUM_REQS'(i)), + .data_out (rsp_tag_in_w) + ); + + assign rsp_data_in_merged[i] = {rsp_tag_in_w, rsp_tmask_in[i], rsp_data_in[i]}; + end + + VX_stream_arbiter #( + .NUM_REQS (NUM_REQS), + .LANES (1), + .DATAW (RSP_DATAW), + .BUFFERED (BUFFERED_RSP), + .TYPE (TYPE) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (rsp_valid_in), + .data_in (rsp_data_in_merged), + .ready_in (rsp_ready_in), + .valid_out (rsp_valid_out), + .data_out ({rsp_tag_out, rsp_tmask_out, rsp_data_out}), + .ready_out (rsp_ready_out) + ); + + end else begin + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + assign req_valid_out = req_valid_in; + assign req_tag_out = req_tag_in; + assign req_addr_out = req_addr_in; + assign req_rw_out = req_rw_in; + assign req_byteen_out = req_byteen_in; + assign req_data_out = req_data_in; + assign req_ready_in = req_ready_out; + + assign rsp_valid_out = rsp_valid_in; + assign rsp_tmask_out = rsp_tmask_in; + assign rsp_tag_out = rsp_tag_in; + assign rsp_data_out = rsp_data_in; + assign rsp_ready_in = rsp_ready_out; - assign {cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]} = req_data_out[0]; - assign {smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]} = req_data_out[1]; end - // - // handle responses - // - - VX_stream_arbiter #( - .NUM_REQS (2), - .DATAW (RSP_DATAW), - .TYPE ("X"), - .BUFFERED (1) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in ({smem_rsp_if.valid, cache_rsp_if.valid}), - .data_in ({{smem_rsp_if.tmask, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}}, - {cache_rsp_if.tmask, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}}), - .ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}), - .valid_out (core_rsp_if.valid), - .data_out ({core_rsp_if.tmask, core_rsp_if.data, core_rsp_if.tag}), - .ready_out (core_rsp_if.ready) - ); - endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v index 594963cc..c01e17f0 100644 --- a/hw/rtl/libs/VX_stream_arbiter.v +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -2,6 +2,7 @@ module VX_stream_arbiter #( parameter NUM_REQS = 1, + parameter LANES = 1, parameter DATAW = 1, parameter TYPE = "R", parameter LOCK_ENABLE = 1, @@ -10,21 +11,36 @@ module VX_stream_arbiter #( input wire clk, input wire reset, - input wire [NUM_REQS-1:0] valid_in, - input wire [NUM_REQS-1:0][DATAW-1:0] data_in, - output wire [NUM_REQS-1:0] ready_in, + input wire [NUM_REQS-1:0][LANES-1:0] valid_in, + input wire [NUM_REQS-1:0][LANES-1:0][DATAW-1:0] data_in, + output wire [NUM_REQS-1:0][LANES-1:0] ready_in, - output wire valid_out, - output wire [DATAW-1:0] data_out, - input wire ready_out + output wire [LANES-1:0] valid_out, + output wire [LANES-1:0][DATAW-1:0] data_out, + input wire [LANES-1:0] ready_out ); localparam LOG_NUM_REQS = $clog2(NUM_REQS); if (NUM_REQS > 1) begin - wire sel_valid; - wire sel_ready; - wire [NUM_REQS-1:0] sel_1hot; + wire sel_valid; + wire sel_ready; + wire [NUM_REQS-1:0] sel_1hot; + + wire [NUM_REQS-1:0] valid_in_any; + wire [LANES-1:0] ready_in_sel; + + if (LANES > 1) begin + for (genvar i = 0; i < NUM_REQS; i++) begin + assign valid_in_any[i] = (| valid_in[i]); + end + assign sel_ready = (| ready_in_sel); + end else begin + for (genvar i = 0; i < NUM_REQS; i++) begin + assign valid_in_any[i] = valid_in[i]; + end + assign sel_ready = ready_in_sel; + end if (TYPE == "X") begin VX_fixed_arbiter #( @@ -33,7 +49,7 @@ module VX_stream_arbiter #( ) sel_arb ( .clk (clk), .reset (reset), - .requests (valid_in), + .requests (valid_in_any), .enable (sel_ready), .grant_valid (sel_valid), .grant_onehot (sel_1hot), @@ -46,7 +62,7 @@ module VX_stream_arbiter #( ) sel_arb ( .clk (clk), .reset (reset), - .requests (valid_in), + .requests (valid_in_any), .enable (sel_ready), .grant_valid (sel_valid), .grant_onehot (sel_1hot), @@ -59,7 +75,7 @@ module VX_stream_arbiter #( ) sel_arb ( .clk (clk), .reset (reset), - .requests (valid_in), + .requests (valid_in_any), .enable (sel_ready), .grant_valid (sel_valid), .grant_onehot (sel_1hot), @@ -72,7 +88,7 @@ module VX_stream_arbiter #( ) sel_arb ( .clk (clk), .reset (reset), - .requests (valid_in), + .requests (valid_in_any), .enable (sel_ready), .grant_valid (sel_valid), .grant_onehot (sel_1hot), @@ -82,34 +98,58 @@ module VX_stream_arbiter #( $error ("invalid parameter"); end - wire [DATAW-1:0] data_in_sel; + wire [LANES-1:0] valid_in_sel; + wire [LANES-1:0][DATAW-1:0] data_in_sel; - VX_onehot_mux #( - .DATAW (DATAW), - .N (NUM_REQS) - ) data_in_mux ( - .data_in (data_in), - .sel_in (sel_1hot), - .data_out (data_in_sel) - ); + if (LANES > 1) begin + wire [NUM_REQS-1:0][(LANES * (1 + DATAW))-1:0] valid_data_in; - VX_skid_buffer #( - .DATAW (DATAW), - .PASSTHRU (0 == BUFFERED), - .OUTPUT_REG (2 == BUFFERED) - ) out_buffer ( - .clk (clk), - .reset (reset), - .valid_in (sel_valid), - .data_in (data_in_sel), - .ready_in (sel_ready), - .valid_out (valid_out), - .data_out (data_out), - .ready_out (ready_out) - ); + for (genvar i = 0; i < NUM_REQS; i++) begin + assign valid_data_in[i] = {valid_in[i], data_in[i]}; + end + + VX_onehot_mux #( + .DATAW (LANES * (1 + DATAW)), + .N (NUM_REQS) + ) data_in_mux ( + .data_in (valid_data_in), + .sel_in (sel_1hot), + .data_out ({valid_in_sel, data_in_sel}) + ); + + `UNUSED_VAR (sel_valid) + end else begin + VX_onehot_mux #( + .DATAW (DATAW), + .N (NUM_REQS) + ) data_in_mux ( + .data_in (data_in), + .sel_in (sel_1hot), + .data_out (data_in_sel) + ); + + assign valid_in_sel = sel_valid; + end for (genvar i = 0; i < NUM_REQS; i++) begin - assign ready_in[i] = sel_1hot[i] && sel_ready; + assign ready_in[i] = ready_in_sel & {LANES{sel_1hot[i]}}; + end + + for (genvar i = 0; i < LANES; ++i) begin + VX_skid_buffer #( + .DATAW (DATAW), + .PASSTHRU (0 == BUFFERED), + .OUTPUT_REG (2 == BUFFERED) + ) out_buffer ( + .clk (clk), + .reset (reset), + .valid_in (valid_in_sel[i]), + .data_in (data_in_sel[i]), + .ready_in (ready_in_sel[i]), + .valid_out (valid_out[i]), + .data_out (data_out[i]), + .ready_out (ready_out[i]) + ); end end else begin diff --git a/hw/rtl/libs/VX_stream_demux.v b/hw/rtl/libs/VX_stream_demux.v index f9d53d7e..9d6038c3 100644 --- a/hw/rtl/libs/VX_stream_demux.v +++ b/hw/rtl/libs/VX_stream_demux.v @@ -2,6 +2,7 @@ module VX_stream_demux #( parameter NUM_REQS = 1, + parameter LANES = 1, parameter DATAW = 1, parameter BUFFERED = 0, localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS) @@ -9,60 +10,58 @@ module VX_stream_demux #( input wire clk, input wire reset, - input wire [LOG_NUM_REQS-1:0] sel, + input wire [LANES-1:0][LOG_NUM_REQS-1:0] sel_in, - input wire valid_in, - input wire [DATAW-1:0] data_in, - output wire ready_in, + input wire [LANES-1:0] valid_in, + input wire [LANES-1:0][DATAW-1:0] data_in, + output wire [LANES-1:0] ready_in, - output wire [NUM_REQS-1:0] valid_out, - output wire [NUM_REQS-1:0][DATAW-1:0] data_out, - input wire [NUM_REQS-1:0] ready_out + output wire [NUM_REQS-1:0][LANES-1:0] valid_out, + output wire [NUM_REQS-1:0][LANES-1:0][DATAW-1:0] data_out, + input wire [NUM_REQS-1:0][LANES-1:0] ready_out ); if (NUM_REQS > 1) begin - reg [NUM_REQS-1:0] valid_out_unqual; - wire [NUM_REQS-1:0][DATAW-1:0] data_out_unqual; - wire [NUM_REQS-1:0] ready_out_unqual; + for (genvar j = 0; j < LANES; ++j) begin - always @(*) begin - valid_out_unqual = '0; - valid_out_unqual[sel] = valid_in; - end - - for (genvar i = 0; i < NUM_REQS; i++) begin - assign data_out_unqual[i] = data_in; - end - - assign ready_in = ready_out_unqual[sel]; + reg [NUM_REQS-1:0] valid_in_sel; + wire [NUM_REQS-1:0] ready_in_sel; - for (genvar i = 0; i < NUM_REQS; i++) begin - VX_skid_buffer #( - .DATAW (DATAW), - .PASSTHRU (0 == BUFFERED), - .OUTPUT_REG (2 == BUFFERED) - ) out_buffer ( - .clk (clk), - .reset (reset), - .valid_in (valid_out_unqual[i]), - .data_in (data_out_unqual[i]), - .ready_in (ready_out_unqual[i]), - .valid_out (valid_out[i]), - .data_out (data_out[i]), - .ready_out (ready_out[i]) - ); + always @(*) begin + valid_in_sel = '0; + valid_in_sel[sel_in[j]] = valid_in[j]; + end + + assign ready_in[j] = ready_in_sel[sel_in[j]]; + + for (genvar i = 0; i < NUM_REQS; i++) + VX_skid_buffer #( + .DATAW (DATAW), + .PASSTHRU (0 == BUFFERED), + .OUTPUT_REG (2 == BUFFERED) + ) out_buffer ( + .clk (clk), + .reset (reset), + .valid_in (valid_in_sel[i]), + .data_in (data_in[j]), + .ready_in (ready_in_sel[i]), + .valid_out (valid_out[i][j]), + .data_out (data_out[i][j]), + .ready_out (ready_out[i][j]) + ); + end end end else begin - + `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (sel) + `UNUSED_VAR (sel_in) assign valid_out = valid_in; assign data_out = data_in; - assign ready_in = ready_out; + assign ready_in = ready_out; end