cache req datapath optimizations

This commit is contained in:
Blaise Tine
2020-12-08 02:58:08 -08:00
parent 268ad15098
commit d5fa82f5e4
17 changed files with 393 additions and 410 deletions

View File

@@ -149,8 +149,6 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
ofs << "$upscope $end" << std::endl; ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl; ofs << "enddefinitions $end" << std::endl;
std::cout << "OK" << std::flush << std::endl;
uint64_t frame_width, max_frames, data_valid, offset, delta; uint64_t frame_width, max_frames, data_valid, offset, delta;
uint64_t timestamp = 0; uint64_t timestamp = 0;
uint64_t frame_offset = 0; uint64_t frame_offset = 0;
@@ -167,8 +165,6 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
std::this_thread::sleep_for(std::chrono::seconds(1)); std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true); } while (true);
std::cout << "OK" << std::flush << std::endl;
// get frame width // get frame width
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH)); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
@@ -239,7 +235,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
signal_id = num_taps; signal_id = num_taps;
if (0 == (frame_no % FRAME_FLUSH_SIZE)) { if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
ofs << std::flush; ofs << std::flush;
std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::flush << std::endl; std::cout << "*** " << frame_no << "/" << max_frames << " frames" << std::endl;
} }
} }
} }

View File

@@ -548,7 +548,7 @@ VX_mem_arb #(
.ADDR_WIDTH ($bits(t_local_mem_addr)), .ADDR_WIDTH ($bits(t_local_mem_addr)),
.TAG_IN_WIDTH (AVS_REQ_TAGW), .TAG_IN_WIDTH (AVS_REQ_TAGW),
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1) .TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
) vx_cci_avs_arb ( ) dram_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -204,17 +204,17 @@ module VX_cluster #(
.req_tag_out (io_req_tag), .req_tag_out (io_req_tag),
.req_ready_out (io_req_ready), .req_ready_out (io_req_ready),
// input responses // input response
.rsp_valid_in (per_core_io_rsp_valid), .rsp_valid_in (io_rsp_valid),
.rsp_data_in (per_core_io_rsp_data), .rsp_tag_in (io_rsp_tag),
.rsp_tag_in (per_core_io_rsp_tag), .rsp_data_in (io_rsp_data),
.rsp_ready_in (per_core_io_rsp_ready), .rsp_ready_in (io_rsp_ready),
// output response // output responses
.rsp_valid_out (io_rsp_valid), .rsp_valid_out (per_core_io_rsp_valid),
.rsp_tag_out (io_rsp_tag), .rsp_data_out (per_core_io_rsp_data),
.rsp_data_out (io_rsp_data), .rsp_tag_out (per_core_io_rsp_tag),
.rsp_ready_out (io_rsp_ready) .rsp_ready_out (per_core_io_rsp_ready)
); );
VX_csr_io_arb #( VX_csr_io_arb #(
@@ -298,35 +298,30 @@ module VX_cluster #(
if (`L2_ENABLE) begin if (`L2_ENABLE) begin
wire [`NUM_CORES-1:0] core_dram_rsp_valid; wire [`NUM_CORES-1:0] per_core_dram_req_valid_qual;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data; wire [`NUM_CORES-1:0] per_core_dram_req_rw_qual;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag; wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] per_core_dram_req_byteen_qual;
wire core_dram_rsp_ready; wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_dram_req_addr_qual;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_req_data_qual;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_req_tag_qual;
wire [`NUM_CORES-1:0] per_core_dram_req_ready_qual;
reg [`NUM_CORES-1:0] core_dram_rsp_ready_other; for (genvar i = 0; i < `NUM_CORES; i++) begin
always @(*) begin VX_skid_buffer #(
core_dram_rsp_ready_other = {`NUM_CORES{1'b1}}; .DATAW (1 + `DDRAM_BYTEEN_WIDTH + `DDRAM_ADDR_WIDTH + `DDRAM_LINE_WIDTH + `XDRAM_TAG_WIDTH),
for (integer i = 0; i < `NUM_CORES; i++) begin .PASSTHRU (`NUM_CORES < 4)
for (integer j = 0; j < `NUM_CORES; j++) begin ) dram_req_buffer (
if (i != j) begin .clk (clk),
core_dram_rsp_ready_other[i] &= (per_core_dram_rsp_ready [j] | !core_dram_rsp_valid [j]); .reset (reset),
end .valid_in (per_core_dram_req_valid[i]),
end .data_in ({per_core_dram_req_rw[i], per_core_dram_req_byteen[i], per_core_dram_req_addr[i], per_core_dram_req_data[i], per_core_dram_req_tag[i]}),
end .ready_in (per_core_dram_req_ready[i]),
.valid_out (per_core_dram_req_valid_qual[i]),
.data_out ({per_core_dram_req_rw_qual[i], per_core_dram_req_byteen_qual[i], per_core_dram_req_addr_qual[i], per_core_dram_req_data_qual[i], per_core_dram_req_tag_qual[i]}),
.ready_out (per_core_dram_req_ready_qual[i])
);
end end
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign per_core_dram_rsp_valid [i] = core_dram_rsp_valid[i] & core_dram_rsp_ready_other [i];
assign per_core_dram_rsp_data [i] = core_dram_rsp_data[i];
assign per_core_dram_rsp_tag [i] = core_dram_rsp_tag[i];
end
assign core_dram_rsp_ready = & (per_core_dram_rsp_ready | ~core_dram_rsp_valid);
wire core_dram_req_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign per_core_dram_req_ready[i] = core_dram_req_ready;
end
VX_cache #( VX_cache #(
.CACHE_ID (`L2CACHE_ID), .CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE), .CACHE_SIZE (`L2CACHE_SIZE),
@@ -355,19 +350,19 @@ module VX_cluster #(
.reset (reset), .reset (reset),
// Core request // Core request
.core_req_valid (per_core_dram_req_valid), .core_req_valid (per_core_dram_req_valid_qual),
.core_req_rw (per_core_dram_req_rw), .core_req_rw (per_core_dram_req_rw_qual),
.core_req_byteen (per_core_dram_req_byteen), .core_req_byteen (per_core_dram_req_byteen_qual),
.core_req_addr (per_core_dram_req_addr), .core_req_addr (per_core_dram_req_addr_qual),
.core_req_data (per_core_dram_req_data), .core_req_data (per_core_dram_req_data_qual),
.core_req_tag (per_core_dram_req_tag), .core_req_tag (per_core_dram_req_tag_qual),
.core_req_ready (core_dram_req_ready), .core_req_ready (per_core_dram_req_ready_qual),
// Core response // Core response
.core_rsp_valid (core_dram_rsp_valid), .core_rsp_valid (per_core_dram_rsp_valid),
.core_rsp_data (core_dram_rsp_data), .core_rsp_data (per_core_dram_rsp_data),
.core_rsp_tag (core_dram_rsp_tag), .core_rsp_tag (per_core_dram_rsp_tag),
.core_rsp_ready (core_dram_rsp_ready), .core_rsp_ready (per_core_dram_rsp_ready),
// DRAM request // DRAM request
.dram_req_valid (dram_req_valid), .dram_req_valid (dram_req_valid),

View File

@@ -4,14 +4,14 @@ module VX_csr_io_arb #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1, parameter DATA_WIDTH = 1,
parameter DATA_SIZE = (DATA_WIDTH / 8), parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter REQS_BITS = `LOG2UP(NUM_REQS) parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire [REQS_BITS-1:0] request_id, input wire [LOG_NUM_REQS-1:0] request_id,
// input requests // input requests
input wire req_valid_in, input wire req_valid_in,
@@ -40,7 +40,7 @@ module VX_csr_io_arb #(
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_valid_out[i] = req_valid_in && (request_id == `REQS_BITS'(i)); assign req_valid_out[i] = req_valid_in && (request_id == LOG_NUM_REQS'(i));
assign req_addr_out[i] = req_addr_in; assign req_addr_out[i] = req_addr_in;
assign req_rw_out[i] = req_rw_in; assign req_rw_out[i] = req_rw_in;
assign req_data_out[i] = req_data_in; assign req_data_out[i] = req_data_in;
@@ -50,8 +50,6 @@ module VX_csr_io_arb #(
end else begin end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (request_id) `UNUSED_VAR (request_id)
assign req_valid_out = req_valid_in; assign req_valid_out = req_valid_in;
@@ -64,36 +62,17 @@ module VX_csr_io_arb #(
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// Inputs buffering
wire [NUM_REQS-1:0] rsp_valid_in_qual;
wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_in_qual;
wire [NUM_REQS-1:0] rsp_ready_in_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATA_WIDTH),
.PASSTHRU (NUM_REQS < 4)
) rsp_buffer (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in[i]),
.data_in (rsp_data_in[i]),
.ready_in (rsp_ready_in[i]),
.valid_out (rsp_valid_in_qual[i]),
.data_out (rsp_data_in_qual[i]),
.ready_out (rsp_ready_in_qual[i])
);
end
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS(NUM_REQS), .NUM_REQS (NUM_REQS),
.DATAW(DATA_WIDTH), .DATAW (DATA_WIDTH),
.BUFFERED(NUM_REQS >= 4) .IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (rsp_valid_in_qual), .valid_in (rsp_valid_in),
.data_in (rsp_data_in_qual), .data_in (rsp_data_in),
.ready_in (rsp_ready_in_qual), .ready_in (rsp_ready_in),
.valid_out (rsp_valid_out), .valid_out (rsp_valid_out),
.data_out (rsp_data_out), .data_out (rsp_data_out),
.ready_out (rsp_ready_out) .ready_out (rsp_ready_out)

View File

@@ -6,9 +6,9 @@ module VX_databus_arb #(
parameter TAG_IN_WIDTH = 1, parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1, parameter TAG_OUT_WIDTH = 1,
parameter WORD_WIDTH = WORD_SIZE * 8, parameter WORD_WIDTH = WORD_SIZE * 8,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter REQS_BITS = `CLOG2(NUM_REQS) parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -32,64 +32,42 @@ module VX_databus_arb #(
input wire req_ready_out, input wire req_ready_out,
// input response // input response
output wire [NUM_REQS-1:0] rsp_valid_in, input wire rsp_valid_in,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_in, input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_in, input wire [WORD_WIDTH-1:0] rsp_data_in,
input wire [NUM_REQS-1:0] rsp_ready_in, output wire rsp_ready_in,
// output response // output responses
input wire rsp_valid_out, output wire [NUM_REQS-1:0] rsp_valid_out,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_out, output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
input wire [WORD_WIDTH-1:0] rsp_data_out, output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_out,
output wire rsp_ready_out input wire [NUM_REQS-1:0] rsp_ready_out
); );
localparam DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH); localparam DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0] valids; wire [NUM_REQS-1:0] valids;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valids[i] = (| req_valid_in[i]);
end
wire [NUM_REQS-1:0][DATAW-1:0] data_in; wire [NUM_REQS-1:0][DATAW-1:0] data_in;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign data_in[i] = {req_valid_in[i], {req_tag_in[i], REQS_BITS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
// Inputs buffering
wire [NUM_REQS-1:0] req_valid_in_qual;
wire [NUM_REQS-1:0][DATAW-1:0] req_data_in_qual;
wire [NUM_REQS-1:0] req_ready_in_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (NUM_REQS < 4)
) req_buffer (
.clk (clk),
.reset (reset),
.valid_in (valids[i]),
.data_in (data_in[i]),
.ready_in (req_ready_in[i]),
.valid_out (req_valid_in_qual[i]),
.data_out (req_data_in_qual[i]),
.ready_out (req_ready_in_qual[i])
);
end
wire [`NUM_THREADS-1:0] req_tmask_out; wire [`NUM_THREADS-1:0] req_tmask_out;
wire req_valid_out_unqual; wire req_valid_out_unqual;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valids[i] = (| req_valid_in[i]);
assign data_in[i] = {req_valid_in[i], {req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.DATAW (DATAW), .DATAW (DATAW),
.BUFFERED (NUM_REQS >= 4) .IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) req_arb ( ) req_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (req_valid_in_qual), .valid_in (valids),
.data_in (req_data_in_qual), .data_in (data_in),
.ready_in (req_ready_in_qual), .ready_in (req_ready_in),
.valid_out (req_valid_out_unqual), .valid_out (req_valid_out_unqual),
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}), .data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out) .ready_out (req_ready_out)
@@ -99,15 +77,15 @@ module VX_databus_arb #(
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = rsp_tag_out[REQS_BITS-1:0]; wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_valid_in[i] = rsp_valid_out && (rsp_sel == REQS_BITS'(i)); assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
assign rsp_tag_in[i] = rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
assign rsp_data_in[i] = rsp_data_out; assign rsp_data_out[i] = rsp_data_in;
end end
assign rsp_ready_out = rsp_ready_in[rsp_sel]; assign rsp_ready_in = rsp_ready_out[rsp_sel];
end else begin end else begin
@@ -122,10 +100,10 @@ module VX_databus_arb #(
assign req_data_out = req_data_in; assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out; assign req_ready_in = req_ready_out;
assign rsp_valid_in = rsp_valid_out; assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_in = rsp_tag_out; assign rsp_tag_out = rsp_tag_in;
assign rsp_data_in = rsp_data_out; assign rsp_data_out = rsp_data_in;
assign rsp_ready_out = rsp_ready_in; assign rsp_ready_in = rsp_ready_out;
end end

View File

@@ -36,7 +36,7 @@ module VX_dcache_arb (
wire core_req_valid; wire core_req_valid;
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (REQ_DATAW) .DATAW (REQ_DATAW)
) req_buffer ( ) req_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -121,9 +121,10 @@ module VX_dcache_arb (
assign rsp_valid_in[2] = (| io_rsp_if.valid); assign rsp_valid_in[2] = (| io_rsp_if.valid);
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS (3), .NUM_REQS (3),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.BUFFERED (1) .IN_BUFFER (1),
.OUT_BUFFER (1)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -138,6 +139,7 @@ module VX_dcache_arb (
assign cache_rsp_if.ready = rsp_ready_in[0]; assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1]; assign smem_rsp_if.ready = rsp_ready_in[1];
assign io_rsp_if.ready = rsp_ready_in[2]; assign io_rsp_if.ready = rsp_ready_in[2];
assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}}; assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}};
endmodule endmodule

View File

@@ -6,9 +6,9 @@ module VX_mem_arb #(
parameter TAG_IN_WIDTH = 1, parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1, parameter TAG_OUT_WIDTH = 1,
parameter DATA_SIZE = (DATA_WIDTH / 8), parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter REQS_BITS = `CLOG2(NUM_REQS) parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -43,45 +43,27 @@ module VX_mem_arb #(
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out, output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out input wire [NUM_REQS-1:0] rsp_ready_out
); );
localparam DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_IN_WIDTH + DATA_WIDTH;
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][DATAW-1:0] data_in; wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign data_in[i] = {{req_tag_in[i], REQS_BITS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; assign data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
// Inputs buffering
wire [NUM_REQS-1:0] req_valid_in_qual;
wire [NUM_REQS-1:0][DATAW-1:0] req_data_in_qual;
wire [NUM_REQS-1:0] req_ready_in_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (NUM_REQS < 4)
) req_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in[i]),
.data_in (data_in[i]),
.ready_in (req_ready_in[i]),
.valid_out (req_valid_in_qual[i]),
.data_out (req_data_in_qual[i]),
.ready_out (req_ready_in_qual[i])
);
end end
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.DATAW (DATAW), .DATAW (REQ_DATAW),
.BUFFERED (NUM_REQS >= 4) .IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) req_arb ( ) req_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (req_valid_in_qual), .valid_in (req_valid_in),
.data_in (req_data_in_qual), .data_in (data_in),
.ready_in (req_ready_in_qual), .ready_in (req_ready_in),
.valid_out (req_valid_out), .valid_out (req_valid_out),
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}), .data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out) .ready_out (req_ready_out)
@@ -89,15 +71,15 @@ module VX_mem_arb #(
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in [REQS_BITS-1:0]; wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_valid_out [i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i)); assign rsp_valid_out [i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
assign rsp_tag_out [i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH]; assign rsp_tag_out [i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
assign rsp_data_out [i] = rsp_data_in; assign rsp_data_out [i] = rsp_data_in;
end end
assign rsp_ready_in = rsp_ready_out [rsp_sel]; assign rsp_ready_in = rsp_ready_out [rsp_sel];
end else begin end else begin

View File

@@ -206,17 +206,17 @@ module Vortex (
.req_tag_out (io_req_tag), .req_tag_out (io_req_tag),
.req_ready_out (io_req_ready), .req_ready_out (io_req_ready),
// input responses // input response
.rsp_valid_in (per_cluster_io_rsp_valid), .rsp_valid_in (io_rsp_valid),
.rsp_data_in (per_cluster_io_rsp_data), .rsp_tag_in (io_rsp_tag),
.rsp_tag_in (per_cluster_io_rsp_tag), .rsp_data_in (io_rsp_data),
.rsp_ready_in (per_cluster_io_rsp_ready), .rsp_ready_in (io_rsp_ready),
// output response // output responses
.rsp_valid_out (io_rsp_valid), .rsp_valid_out (per_cluster_io_rsp_valid),
.rsp_tag_out (io_rsp_tag), .rsp_data_out (per_cluster_io_rsp_data),
.rsp_data_out (io_rsp_data), .rsp_tag_out (per_cluster_io_rsp_tag),
.rsp_ready_out (io_rsp_ready) .rsp_ready_out (per_cluster_io_rsp_ready)
); );
VX_csr_io_arb #( VX_csr_io_arb #(
@@ -300,36 +300,30 @@ module Vortex (
if (`L3_ENABLE) begin if (`L3_ENABLE) begin
wire [`NUM_CLUSTERS-1:0] cluster_dram_rsp_valid; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag; wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen_qual;
wire cluster_dram_rsp_ready; wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag_qual;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready_qual;
reg [`NUM_CLUSTERS-1:0] cluster_dram_rsp_ready_other; for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
always @(*) begin VX_skid_buffer #(
cluster_dram_rsp_ready_other = {`NUM_CLUSTERS{1'b1}}; .DATAW (1 + `L2DRAM_BYTEEN_WIDTH + `L2DRAM_ADDR_WIDTH + `L2DRAM_LINE_WIDTH + `L2DRAM_TAG_WIDTH),
for (integer i = 0; i < `NUM_CLUSTERS; i++) begin .PASSTHRU (`NUM_CLUSTERS < 4)
for (integer j = 0; j < `NUM_CLUSTERS; j++) begin ) dram_req_buffer (
if (i != j) begin .clk (clk),
cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]); .reset (reset),
end .valid_in (per_cluster_dram_req_valid[i]),
end .data_in ({per_cluster_dram_req_rw[i], per_cluster_dram_req_byteen[i], per_cluster_dram_req_addr[i], per_cluster_dram_req_data[i], per_cluster_dram_req_tag[i]}),
end .ready_in (per_cluster_dram_req_ready[i]),
.valid_out (per_cluster_dram_req_valid_qual[i]),
.data_out ({per_cluster_dram_req_rw_qual[i], per_cluster_dram_req_byteen_qual[i], per_cluster_dram_req_addr_qual[i], per_cluster_dram_req_data_qual[i], per_cluster_dram_req_tag_qual[i]}),
.ready_out (per_cluster_dram_req_ready_qual[i])
);
end end
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
wire cluster_dram_req_ready;
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
assign per_cluster_dram_req_ready[i] = cluster_dram_req_ready;
end
VX_cache #( VX_cache #(
.CACHE_ID (`L3CACHE_ID), .CACHE_ID (`L3CACHE_ID),
.CACHE_SIZE (`L3CACHE_SIZE), .CACHE_SIZE (`L3CACHE_SIZE),
@@ -358,19 +352,19 @@ module Vortex (
.reset (reset), .reset (reset),
// Core request // Core request
.core_req_valid (per_cluster_dram_req_valid), .core_req_valid (per_cluster_dram_req_valid_qual),
.core_req_rw (per_cluster_dram_req_rw), .core_req_rw (per_cluster_dram_req_rw_qual),
.core_req_byteen (per_cluster_dram_req_byteen), .core_req_byteen (per_cluster_dram_req_byteen_qual),
.core_req_addr (per_cluster_dram_req_addr), .core_req_addr (per_cluster_dram_req_addr_qual),
.core_req_data (per_cluster_dram_req_data), .core_req_data (per_cluster_dram_req_data_qual),
.core_req_tag (per_cluster_dram_req_tag), .core_req_tag (per_cluster_dram_req_tag_qual),
.core_req_ready (cluster_dram_req_ready), .core_req_ready (per_cluster_dram_req_ready_qual),
// Core response // Core response
.core_rsp_valid (cluster_dram_rsp_valid), .core_rsp_valid (per_cluster_dram_rsp_valid),
.core_rsp_data (cluster_dram_rsp_data), .core_rsp_data (per_cluster_dram_rsp_data),
.core_rsp_tag (cluster_dram_rsp_tag), .core_rsp_tag (per_cluster_dram_rsp_tag),
.core_rsp_ready (cluster_dram_rsp_ready), .core_rsp_ready (per_cluster_dram_rsp_ready),
// DRAM request // DRAM request
.dram_req_valid (dram_req_valid), .dram_req_valid (dram_req_valid),

View File

@@ -264,7 +264,9 @@ module VX_bank #(
.full (creq_full) .full (creq_full)
); );
reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size;
wire [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size_n;
reg mshr_going_full;
wire mshr_pop; wire mshr_pop;
wire mshr_valid_st0; wire mshr_valid_st0;
wire[`REQS_BITS-1:0] mshr_tid_st0; wire[`REQS_BITS-1:0] mshr_tid_st0;
@@ -346,14 +348,12 @@ module VX_bank #(
wire dreq_push_stall; wire dreq_push_stall;
wire srsq_push_stall; wire srsq_push_stall;
wire pipeline_stall; wire pipeline_stall;
wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2);
wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3); wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3);
wire creq_commit = valid_st1 && core_req_hit_st1 && !pipeline_stall; wire creq_commit = valid_st1 && core_req_hit_st1 && !pipeline_stall;
wire mshr_going_full = (mshr_pending_size == MSHR_SIZE);
// determine which queue to pop next in piority order // determine which queue to pop next in piority order
wire mshr_pop_unqual = mshr_valid_st0; wire mshr_pop_unqual = mshr_valid_st0;
wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty; wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty;
@@ -367,13 +367,16 @@ module VX_bank #(
assign sreq_pop = sreq_pop_unqual && !pipeline_stall; assign sreq_pop = sreq_pop_unqual && !pipeline_stall;
// MSHR pending size // MSHR pending size
assign mshr_pending_size_n = mshr_pending_size +
((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0));
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
mshr_pending_size <= 0; mshr_pending_size <= 0;
mshr_going_full <= 0;
end else begin end else begin
mshr_pending_size <= mshr_pending_size + mshr_pending_size <= mshr_pending_size_n;
((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0)); mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE);
end end
end end
assign is_mshr_st0 = mshr_pop_unqual; assign is_mshr_st0 = mshr_pop_unqual;
@@ -736,7 +739,7 @@ end
.enqueue_byteen_st3 (req_byteen_st3), .enqueue_byteen_st3 (req_byteen_st3),
.enqueue_is_snp_st3 (is_snp_st3), .enqueue_is_snp_st3 (is_snp_st3),
.enqueue_snp_inv_st3(snp_inv_st3), .enqueue_snp_inv_st3(snp_inv_st3),
.enqueue_mshr_st3 (is_mshr_st3), .enqueue_is_mshr_st3(is_mshr_st3),
.enqueue_ready_st3 (mshr_init_ready_state_st3), .enqueue_ready_st3 (mshr_init_ready_state_st3),
.enqueue_full (mshr_full), .enqueue_full (mshr_full),

View File

@@ -39,11 +39,11 @@ module VX_cache #(
// Enable cache flush // Enable cache flush
parameter FLUSH_ENABLE = 1, parameter FLUSH_ENABLE = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = $clog2(MSHR_SIZE),
// core request tag size // core request tag size
parameter CORE_TAG_WIDTH = CORE_TAG_ID_BITS, parameter CORE_TAG_WIDTH = $clog2(MSHR_SIZE),
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size // dram request tag size
parameter DRAM_TAG_WIDTH = (32 - $clog2(BANK_LINE_SIZE)), parameter DRAM_TAG_WIDTH = (32 - $clog2(BANK_LINE_SIZE)),
@@ -63,13 +63,13 @@ module VX_cache #(
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready, output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready,
// Core response // Core response
output wire [NUM_REQS-1:0] core_rsp_valid, output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready, input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
// DRAM request // DRAM request
output wire dram_req_valid, output wire dram_req_valid,
@@ -139,9 +139,10 @@ module VX_cache #(
VX_cache_core_req_bank_sel #( VX_cache_core_req_bank_sel #(
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS) .NUM_REQS (NUM_REQS),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_req_bank_sel ( ) cache_core_req_bank_sel (
.core_req_valid (core_req_valid), .core_req_valid (core_req_valid),
.core_req_addr (core_req_addr), .core_req_addr (core_req_addr),
@@ -197,7 +198,7 @@ module VX_cache #(
wire curr_bank_miss; wire curr_bank_miss;
// Core Req // Core Req
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQS{core_req_ready}}); assign curr_bank_core_req_valid = per_bank_valid[i];
assign curr_bank_core_req_addr = core_req_addr; assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_rw = core_req_rw; assign curr_bank_core_req_rw = core_req_rw;
assign curr_bank_core_req_byteen = core_req_byteen; assign curr_bank_core_req_byteen = core_req_byteen;
@@ -355,18 +356,18 @@ module VX_cache #(
end end
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS(NUM_BANKS), .NUM_REQS (NUM_BANKS),
.DATAW(`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH), .DATAW (`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
.BUFFERED(NUM_BANKS >= 4) .OUT_BUFFER (NUM_BANKS >= 4)
) dram_req_arb ( ) dram_req_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (per_bank_dram_req_valid), .valid_in (per_bank_dram_req_valid),
.data_in (data_in), .data_in (data_in),
.ready_in (per_bank_dram_req_ready), .ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid), .valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}), .data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready) .ready_out (dram_req_ready)
); );
end else begin end else begin
`UNUSED_VAR (per_bank_dram_req_valid) `UNUSED_VAR (per_bank_dram_req_valid)
@@ -385,18 +386,18 @@ module VX_cache #(
if (FLUSH_ENABLE) begin if (FLUSH_ENABLE) begin
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS(NUM_BANKS), .NUM_REQS (NUM_BANKS),
.DATAW(SNP_TAG_WIDTH), .DATAW (SNP_TAG_WIDTH),
.BUFFERED(NUM_BANKS >= 4) .OUT_BUFFER (NUM_BANKS >= 4)
) snp_rsp_arb ( ) snp_rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (per_bank_snp_rsp_valid), .valid_in (per_bank_snp_rsp_valid),
.data_in (per_bank_snp_rsp_tag), .data_in (per_bank_snp_rsp_tag),
.ready_in (per_bank_snp_rsp_ready), .ready_in (per_bank_snp_rsp_ready),
.valid_out (snp_rsp_valid), .valid_out (snp_rsp_valid),
.data_out (snp_rsp_tag), .data_out (snp_rsp_tag),
.ready_out (snp_rsp_ready) .ready_out (snp_rsp_ready)
); );
end else begin end else begin
`UNUSED_VAR (per_bank_snp_rsp_valid) `UNUSED_VAR (per_bank_snp_rsp_valid)

View File

@@ -15,7 +15,7 @@
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)
// data metadata word_sel is_snp snp_inv // data metadata word_sel is_snp snp_inv
`define MSHR_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1) `define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1)
`define BANK_BITS `LOG2UP(NUM_BANKS) `define BANK_BITS `LOG2UP(NUM_BANKS)

View File

@@ -8,53 +8,72 @@ module VX_cache_core_req_bank_sel #(
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of Word requests per cycle // Number of Word requests per cycle
parameter NUM_REQS = 1 parameter NUM_REQS = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 1
) ( ) (
input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
output wire core_req_ready, output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready,
output wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid, output wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid,
input wire [NUM_BANKS-1:0] per_bank_ready input wire [NUM_BANKS-1:0] per_bank_ready
); );
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0] per_bank_ready_ignore;
reg [NUM_BANKS-1:0] per_bank_ready_other;
always @(*) begin
per_bank_valid_r = 0;
per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j) begin
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end
end
reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r;
always @(*) begin
per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQS; i++) begin for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i]; per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end end
end end
for (genvar i = 0; i < NUM_BANKS; i++) begin if (CORE_TAG_ID_BITS != 0) begin
for (genvar j = 0; j < NUM_REQS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] & per_bank_ready_other[i]; reg [NUM_BANKS-1:0] per_bank_ready_other, per_bank_ready_ignore;
end
end always @(*) begin
per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
assign core_req_ready = & (per_bank_ready | per_bank_ready_ignore); for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j) begin
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end
end
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] && per_bank_ready_other[i];
end
end
assign core_req_ready[0] = & (per_bank_ready | per_bank_ready_ignore);
end else begin
assign per_bank_valid = per_bank_valid_r;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign core_req_ready[i] = per_bank_ready[core_req_addr[i][`BANK_SELECT_ADDR_RNG]];
end
end
end else begin end else begin
`UNUSED_VAR (core_req_addr) `UNUSED_VAR (core_req_addr)
assign per_bank_valid = core_req_valid; assign per_bank_valid = core_req_valid;
assign core_req_ready = per_bank_ready; assign core_req_ready[0] = per_bank_ready;
end end

View File

@@ -26,18 +26,20 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_REQS-1:0] core_rsp_valid, output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire core_rsp_ready input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready
); );
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual; reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select; reg [NUM_BANKS-1:0] core_rsp_bank_select;
if (CORE_TAG_ID_BITS != 0) begin if (CORE_TAG_ID_BITS != 0) begin
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [CORE_TAG_ID_BITS-1:0] sel_tag_id; reg [CORE_TAG_ID_BITS-1:0] sel_tag_id;
wire stall = ~core_rsp_ready && (| core_rsp_valid);
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
@@ -60,13 +62,32 @@ module VX_cache_core_rsp_merge #(
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == sel_tag_id)) begin && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == sel_tag_id)) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1; core_rsp_bank_select[i] = ~stall;
end end
end end
end
VX_generic_register #(
.N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + CORE_TAG_WIDTH),
.R(NUM_REQS)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
end end
end else begin end else begin
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0] stall;
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x; core_rsp_tag_unqual = 'x;
@@ -79,29 +100,32 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i]; core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1; core_rsp_bank_select[i] = ~stall[per_bank_core_rsp_tid[i]];
end end
end end
end end
end for (genvar i = 0; i < NUM_REQS; i++) begin
wire stall = ~core_rsp_ready && (| core_rsp_valid); assign stall[i] = ~core_rsp_ready[i] && core_rsp_valid[i];
VX_generic_register #( VX_generic_register #(
.N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), .N(1 + `WORD_WIDTH + CORE_TAG_WIDTH),
.R(NUM_REQS) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall[i]),
.flush (1'b0), .flush (1'b0),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), .data_in ({core_rsp_valid_unqual[i], core_rsp_data_unqual[i], core_rsp_tag_unqual[i]}),
.data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) .data_out ({core_rsp_valid[i], core_rsp_data[i], core_rsp_tag[i]})
); );
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i] && ~stall;
end end
end else begin end else begin
@@ -116,14 +140,19 @@ module VX_cache_core_rsp_merge #(
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
if (CORE_TAG_ID_BITS != 0) begin if (CORE_TAG_ID_BITS != 0) begin
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag[0]; core_rsp_tag_unqual = per_bank_core_rsp_tag[0];
core_rsp_data_unqual = 'x; core_rsp_data_unqual = 'x;
core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid; core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid;
core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0]; core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0];
end end
assign per_bank_core_rsp_ready[0] = core_rsp_ready;
end else begin end else begin
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x; core_rsp_tag_unqual = 'x;
@@ -131,14 +160,16 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid; core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid;
core_rsp_tag_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_tag[0]; core_rsp_tag_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_tag[0];
core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0]; core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0];
end end
assign per_bank_core_rsp_ready[0] = core_rsp_ready[per_bank_core_rsp_tid[0]];
end end
assign core_rsp_valid = core_rsp_valid_unqual; assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tag = core_rsp_tag_unqual; assign core_rsp_tag = core_rsp_tag_unqual;
assign core_rsp_data = core_rsp_data_unqual; assign core_rsp_data = core_rsp_data_unqual;
assign per_bank_core_rsp_ready[0] = core_rsp_ready;
end else begin end else begin
`UNUSED_VAR(per_bank_core_rsp_tid) `UNUSED_VAR(per_bank_core_rsp_tid)

View File

@@ -48,7 +48,7 @@ module VX_miss_resrv #(
input wire[WORD_SIZE-1:0] enqueue_byteen_st3, input wire[WORD_SIZE-1:0] enqueue_byteen_st3,
input wire enqueue_is_snp_st3, input wire enqueue_is_snp_st3,
input wire enqueue_snp_inv_st3, input wire enqueue_snp_inv_st3,
input wire enqueue_mshr_st3, input wire enqueue_is_mshr_st3,
input wire enqueue_ready_st3, input wire enqueue_ready_st3,
output wire enqueue_full, output wire enqueue_full,
@@ -71,7 +71,7 @@ module VX_miss_resrv #(
output wire dequeue_snp_inv_st0, output wire dequeue_snp_inv_st0,
input wire dequeue_st3 input wire dequeue_st3
); );
wire [`MSHR_METADATA_WIDTH-1:0] metadata_table; wire [`MSHR_DATA_WIDTH-1:0] data_table;
reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
@@ -91,7 +91,7 @@ module VX_miss_resrv #(
assign pending_hazard_st0 = (| valid_address_match); assign pending_hazard_st0 = (| valid_address_match);
wire dequeue_ready = valid_table[schedule_ptr] && ready_table[schedule_ptr]; wire dequeue_ready = ready_table[schedule_ptr];
assign dequeue_valid_st0 = dequeue_ready; assign dequeue_valid_st0 = dequeue_ready;
assign dequeue_addr_st0 = addr_table[schedule_ptr]; assign dequeue_addr_st0 = addr_table[schedule_ptr];
@@ -102,9 +102,9 @@ module VX_miss_resrv #(
dequeue_byteen_st0, dequeue_byteen_st0,
dequeue_wsel_st0, dequeue_wsel_st0,
dequeue_is_snp_st0, dequeue_is_snp_st0,
dequeue_snp_inv_st0} = metadata_table; dequeue_snp_inv_st0} = data_table;
wire mshr_push = enqueue_st3 && !enqueue_mshr_st3; wire mshr_push = enqueue_st3 && !enqueue_is_mshr_st3;
wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1);
@@ -124,7 +124,7 @@ module VX_miss_resrv #(
if (enqueue_st3) begin if (enqueue_st3) begin
assert(!enqueue_full); assert(!enqueue_full);
if (enqueue_mshr_st3) begin if (enqueue_is_mshr_st3) begin
// returning missed msrq entry, restore schedule // returning missed msrq entry, restore schedule
valid_table[restore_ptr] <= 1; valid_table[restore_ptr] <= 1;
ready_table[restore_ptr] <= enqueue_ready_st3; ready_table[restore_ptr] <= enqueue_ready_st3;
@@ -146,19 +146,20 @@ module VX_miss_resrv #(
if (schedule_st0) begin if (schedule_st0) begin
assert(dequeue_valid_st0); assert(dequeue_valid_st0);
valid_table[schedule_ptr] <= 0; valid_table[schedule_ptr] <= 0;
ready_table[schedule_ptr] <= 0;
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
end end
end end
end end
always @(posedge clk) begin always @(posedge clk) begin
if (enqueue_st3 && !enqueue_mshr_st3) begin if (enqueue_st3 && !enqueue_is_mshr_st3) begin
addr_table[tail_ptr] <= enqueue_addr_st3; addr_table[tail_ptr] <= enqueue_addr_st3;
end end
end end
VX_dp_ram #( VX_dp_ram #(
.DATAW(`MSHR_METADATA_WIDTH), .DATAW(`MSHR_DATA_WIDTH),
.SIZE(MSHR_SIZE), .SIZE(MSHR_SIZE),
.BYTEENW(1), .BYTEENW(1),
.BUFFERED(0), .BUFFERED(0),
@@ -171,7 +172,7 @@ module VX_miss_resrv #(
.byteen(1'b1), .byteen(1'b1),
.rden(1'b1), .rden(1'b1),
.din({enqueue_data_st3, enqueue_tid_st3, enqueue_tag_st3, enqueue_rw_st3, enqueue_byteen_st3, enqueue_wsel_st3, enqueue_is_snp_st3, enqueue_snp_inv_st3}), .din({enqueue_data_st3, enqueue_tid_st3, enqueue_tag_st3, enqueue_rw_st3, enqueue_byteen_st3, enqueue_wsel_st3, enqueue_is_snp_st3, enqueue_snp_inv_st3}),
.dout(metadata_table) .dout(data_table)
); );
`ifdef DBG_PRINT_CACHE_MSHR `ifdef DBG_PRINT_CACHE_MSHR
@@ -180,7 +181,7 @@ module VX_miss_resrv #(
if (schedule_st0) if (schedule_st0)
$display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
if (enqueue_st3) begin if (enqueue_st3) begin
if (enqueue_mshr_st3) if (enqueue_is_mshr_st3)
$display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3); $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3);
else else
$display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3, debug_wid_st3, debug_pc_st3); $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3, debug_wid_st3, debug_pc_st3);

View File

@@ -44,26 +44,6 @@ module VX_snp_forwarder #(
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
// Inputs buffering
wire [NUM_REQS-1:0] snp_fwdin_valid_qual;
wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag_qual;
wire [NUM_REQS-1:0] snp_fwdin_ready_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (TAG_OUT_WIDTH),
.PASSTHRU (NUM_REQS < 4)
) snp_fwdin_buffer (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid[i]),
.data_in (snp_fwdin_tag[i]),
.ready_in (snp_fwdin_ready[i]),
.valid_out (snp_fwdin_valid_qual[i]),
.data_out (snp_fwdin_tag_qual[i]),
.ready_out (snp_fwdin_ready_qual[i])
);
end
reg [REQ_QUAL_BITS:0] pending_cntrs [SREQ_SIZE-1:0]; reg [REQ_QUAL_BITS:0] pending_cntrs [SREQ_SIZE-1:0];
wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr; wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr;
@@ -181,15 +161,16 @@ module VX_snp_forwarder #(
assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold; assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold;
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS(NUM_REQS), .NUM_REQS (NUM_REQS),
.DATAW(TAG_OUT_WIDTH), .DATAW (TAG_OUT_WIDTH),
.BUFFERED(NUM_REQS >= 4) .IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) snp_fwdin_arb ( ) snp_fwdin_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (snp_fwdin_valid_qual), .valid_in (snp_fwdin_valid),
.data_in (snp_fwdin_tag_qual), .data_in (snp_fwdin_tag),
.ready_in (snp_fwdin_ready_qual), .ready_in (snp_fwdin_ready),
.valid_out (fwdin_valid), .valid_out (fwdin_valid),
.data_out (fwdin_tag), .data_out (fwdin_tag),
.ready_out (fwdin_ready) .ready_out (fwdin_ready)

View File

@@ -52,9 +52,7 @@ module VX_cam_buffer #(
write_addr_r <= ADDRW'(1'b0); write_addr_r <= ADDRW'(1'b0);
end else begin end else begin
if (release_slot) begin if (release_slot) begin
assert(0 == free_slots[release_addr]) else begin assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr);
$display("%t: releasing invalid slot at port %d", $time, release_addr);
end
end end
free_slots <= free_slots_n; free_slots <= free_slots_n;
write_addr_r <= free_index; write_addr_r <= free_index;

View File

@@ -1,10 +1,11 @@
`include "VX_platform.vh" `include "VX_platform.vh"
module VX_stream_arbiter #( module VX_stream_arbiter #(
parameter NUM_REQS = 1, parameter NUM_REQS = 1,
parameter DATAW = 1, parameter DATAW = 1,
parameter TYPE = "R", parameter TYPE = "R",
parameter BUFFERED = 0 parameter IN_BUFFER = 0,
parameter OUT_BUFFER = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -17,18 +18,30 @@ module VX_stream_arbiter #(
output wire [DATAW-1:0] data_out, output wire [DATAW-1:0] data_out,
input wire ready_out input wire ready_out
); );
localparam LOG_NUM_REQS = $clog2(NUM_REQS); localparam LOG_NUM_REQS = $clog2(NUM_REQS);
if (NUM_REQS == 1) begin if (NUM_REQS > 1) begin
`UNUSED_VAR (clk) wire [NUM_REQS-1:0] valid_in_qual;
`UNUSED_VAR (reset) wire [NUM_REQS-1:0][DATAW-1:0] data_in_qual;
wire [NUM_REQS-1:0] ready_in_qual;
assign valid_out = valid_in;
assign data_out = data_in;
assign ready_in = ready_out;
end else begin for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (!IN_BUFFER)
) req_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in[i]),
.data_in (data_in[i]),
.ready_in (ready_in[i]),
.valid_out (valid_in_qual[i]),
.data_out (data_in_qual[i]),
.ready_out (ready_in_qual[i])
);
end
wire sel_enable; wire sel_enable;
wire sel_valid; wire sel_valid;
@@ -41,13 +54,13 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS), .NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1) .LOCK_ENABLE(1)
) sel_arb ( ) sel_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (valid_in), .requests (valid_in_qual),
.enable (sel_enable), .enable (sel_enable),
.grant_valid (sel_valid), .grant_valid (sel_valid),
.grant_index (sel_idx), .grant_index (sel_idx),
.grant_onehot(sel_1hot) .grant_onehot (sel_1hot)
); );
end else if (TYPE == "R") begin end else if (TYPE == "R") begin
@@ -56,13 +69,13 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS), .NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1) .LOCK_ENABLE(1)
) sel_arb ( ) sel_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (valid_in), .requests (valid_in_qual),
.enable (sel_enable), .enable (sel_enable),
.grant_valid (sel_valid), .grant_valid (sel_valid),
.grant_index (sel_idx), .grant_index (sel_idx),
.grant_onehot(sel_1hot) .grant_onehot (sel_1hot)
); );
end else if (TYPE == "F") begin end else if (TYPE == "F") begin
@@ -71,13 +84,13 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS), .NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1) .LOCK_ENABLE(1)
) sel_arb ( ) sel_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (valid_in), .requests (valid_in_qual),
.enable (sel_enable), .enable (sel_enable),
.grant_valid (sel_valid), .grant_valid (sel_valid),
.grant_index (sel_idx), .grant_index (sel_idx),
.grant_onehot(sel_1hot) .grant_onehot (sel_1hot)
); );
end else if (TYPE == "M") begin end else if (TYPE == "M") begin
@@ -86,18 +99,18 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS), .NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1) .LOCK_ENABLE(1)
) sel_arb ( ) sel_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (valid_in), .requests (valid_in_qual),
.enable (sel_enable), .enable (sel_enable),
.grant_valid (sel_valid), .grant_valid (sel_valid),
.grant_index (sel_idx), .grant_index (sel_idx),
.grant_onehot(sel_1hot) .grant_onehot (sel_1hot)
); );
end end
if (BUFFERED) begin if (OUT_BUFFER) begin
wire stall = ~ready_out && valid_out; wire stall = ~ready_out && valid_out;
assign sel_enable = ~stall; assign sel_enable = ~stall;
@@ -110,25 +123,35 @@ module VX_stream_arbiter #(
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.data_in ({sel_valid, data_in[sel_idx]}), .data_in ({sel_valid, data_in_qual[sel_idx]}),
.data_out ({valid_out, data_out}) .data_out ({valid_out, data_out})
); );
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign ready_in[i] = sel_1hot[i] && ~stall; assign ready_in_qual[i] = sel_1hot[i] && ~stall;
end end
end else begin end else begin
assign sel_enable = ready_out; assign sel_enable = ready_out;
assign valid_out = sel_valid;
assign data_out = data_in_qual[sel_idx];
assign valid_out = sel_valid;
assign data_out = data_in[sel_idx];
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign ready_in[i] = sel_1hot[i] && ready_out; assign ready_in_qual[i] = sel_1hot[i] && ready_out;
end end
end
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign valid_out = valid_in;
assign data_out = data_in;
assign ready_in = ready_out;
end end
endmodule endmodule