Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

79
hw/rtl/mem/VX_gbar_arb.sv Normal file
View File

@@ -0,0 +1,79 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_gbar_arb #(
parameter NUM_REQS = 1,
parameter OUT_REG = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
VX_gbar_bus_if.slave bus_in_if [NUM_REQS],
VX_gbar_bus_if.master bus_out_if
);
localparam REQ_DATAW = `NB_WIDTH + `NC_WIDTH + `NC_WIDTH;
// arbitrate request
wire [NUM_REQS-1:0] req_valid_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_REQS-1:0] req_ready_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_valid_in[i] = bus_in_if[i].req_valid;
assign req_data_in[i] = {bus_in_if[i].req_id, bus_in_if[i].req_size_m1, bus_in_if[i].req_core_id};
assign bus_in_if[i].req_ready = req_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (1),
.DATAW (REQ_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.ready_in (req_ready_in),
.data_in (req_data_in),
.data_out ({bus_out_if.req_id, bus_out_if.req_size_m1, bus_out_if.req_core_id}),
.valid_out (bus_out_if.req_valid),
.ready_out (bus_out_if.req_ready),
`UNUSED_PIN (sel_out)
);
// broadcast response
reg rsp_valid;
reg [`NB_WIDTH-1:0] rsp_id;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
end else begin
rsp_valid <= bus_out_if.rsp_valid;
end
rsp_id <= bus_out_if.rsp_id;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign bus_in_if[i].rsp_valid = rsp_valid;
assign bus_in_if[i].rsp_id = rsp_id;
end
endmodule

View File

@@ -0,0 +1,49 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
interface VX_gbar_bus_if ();
wire req_valid;
wire [`NB_WIDTH-1:0] req_id;
wire [`NC_WIDTH-1:0] req_size_m1;
wire [`NC_WIDTH-1:0] req_core_id;
wire req_ready;
wire rsp_valid;
wire [`NB_WIDTH-1:0] rsp_id;
modport master (
output req_valid,
output req_id,
output req_size_m1,
output req_core_id,
input req_ready,
input rsp_valid,
input rsp_id
);
modport slave (
input req_valid,
input req_id,
input req_size_m1,
input req_core_id,
output req_ready,
output rsp_valid,
output rsp_id
);
endinterface

View File

@@ -0,0 +1,72 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_gbar_unit #(
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
VX_gbar_bus_if.slave gbar_bus_if
);
`UNUSED_SPARAM (INSTANCE_ID)
reg [`NB_WIDTH-1:0][`NUM_CORES-1:0] barrier_masks;
wire [`CLOG2(`NUM_CORES+1)-1:0] active_barrier_count;
wire [`NUM_CORES-1:0] curr_barrier_mask = barrier_masks[gbar_bus_if.req_id];
`POP_COUNT(active_barrier_count, curr_barrier_mask);
`UNUSED_VAR (active_barrier_count)
reg rsp_valid;
reg [`NB_WIDTH-1:0] rsp_bar_id;
always @(posedge clk) begin
if (reset) begin
barrier_masks <= '0;
rsp_valid <= 0;
end else begin
if (rsp_valid) begin
rsp_valid <= 0;
end
if (gbar_bus_if.req_valid) begin
if (active_barrier_count[`NC_WIDTH-1:0] == gbar_bus_if.req_size_m1) begin
barrier_masks[gbar_bus_if.req_id] <= '0;
rsp_bar_id <= gbar_bus_if.req_id;
rsp_valid <= 1;
end else begin
barrier_masks[gbar_bus_if.req_id][gbar_bus_if.req_core_id] <= 1;
end
end
end
end
assign gbar_bus_if.rsp_valid = rsp_valid;
assign gbar_bus_if.rsp_id = rsp_bar_id;
assign gbar_bus_if.req_ready = 1; // global barrier unit is always ready (no dependencies)
`ifdef DBG_TRACE_GBAR
always @(posedge clk) begin
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
`TRACE(1, ("%d: %s-acquire: bar_id=%0d, size=%0d, core_id=%0d\n",
$time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id));
end
if (gbar_bus_if.rsp_valid) begin
`TRACE(1, ("%d: %s-release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id));
end
end
`endif
endmodule

177
hw/rtl/mem/VX_mem_arb.sv Normal file
View File

@@ -0,0 +1,177 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_mem_arb #(
parameter NUM_INPUTS = 1,
parameter NUM_OUTPUTS = 1,
parameter DATA_SIZE = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)),
parameter TAG_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter OUT_REG_REQ = 0,
parameter OUT_REG_RSP = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
VX_mem_bus_if.slave bus_in_if [NUM_INPUTS],
VX_mem_bus_if.master bus_out_if [NUM_OUTPUTS]
);
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
`STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter"))
wire [NUM_INPUTS-1:0] req_valid_in;
wire [NUM_INPUTS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_INPUTS-1:0] req_ready_in;
wire [NUM_OUTPUTS-1:0] req_valid_out;
wire [NUM_OUTPUTS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_OUTPUTS-1:0][`UP(LOG_NUM_REQS)-1:0] req_sel_out;
wire [NUM_OUTPUTS-1:0] req_ready_out;
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
assign req_valid_in[i] = bus_in_if[i].req_valid;
assign req_data_in[i] = {bus_in_if[i].req_data.tag, bus_in_if[i].req_data.addr, bus_in_if[i].req_data.rw, bus_in_if[i].req_data.byteen, bus_in_if[i].req_data.data};
assign bus_in_if[i].req_ready = req_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_OUTPUTS),
.DATAW (REQ_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG_REQ)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.ready_in (req_ready_in),
.data_in (req_data_in),
.data_out (req_data_out),
.sel_out (req_sel_out),
.valid_out (req_valid_out),
.ready_out (req_ready_out)
);
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
wire [TAG_WIDTH-1:0] req_tag_out;
VX_bits_insert #(
.N (TAG_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (req_tag_out),
.sel_in (req_sel_out[i]),
.data_out (bus_out_if[i].req_data.tag)
);
assign bus_out_if[i].req_valid = req_valid_out[i];
assign {req_tag_out, bus_out_if[i].req_data.addr, bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.data} = req_data_out[i];
assign req_ready_out[i] = bus_out_if[i].req_ready;
end
///////////////////////////////////////////////////////////////////////////
wire [NUM_INPUTS-1:0] rsp_valid_out;
wire [NUM_INPUTS-1:0][RSP_DATAW-1:0] rsp_data_out;
wire [NUM_INPUTS-1:0] rsp_ready_out;
wire [NUM_OUTPUTS-1:0] rsp_valid_in;
wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_OUTPUTS-1:0] rsp_ready_in;
if (NUM_INPUTS > NUM_OUTPUTS) begin
wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in;
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
wire [TAG_WIDTH-1:0] rsp_tag_out;
VX_bits_remove #(
.N (TAG_WIDTH + LOG_NUM_REQS),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (bus_out_if[i].rsp_data.tag),
.data_out (rsp_tag_out)
);
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = {rsp_tag_out, bus_out_if[i].rsp_data.data};
assign bus_out_if[i].rsp_ready = rsp_ready_in[i];
if (NUM_INPUTS > 1) begin
assign rsp_sel_in[i] = bus_out_if[i].rsp_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS];
end else begin
assign rsp_sel_in[i] = '0;
end
end
VX_stream_switch #(
.NUM_INPUTS (NUM_OUTPUTS),
.NUM_OUTPUTS (NUM_INPUTS),
.DATAW (RSP_DATAW),
.OUT_REG (OUT_REG_RSP)
) rsp_switch (
.clk (clk),
.reset (reset),
.sel_in (rsp_sel_in),
.valid_in (rsp_valid_in),
.ready_in (rsp_ready_in),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid_out),
.ready_out (rsp_ready_out)
);
end else begin
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = {bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data};
assign bus_out_if[i].rsp_ready = rsp_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_OUTPUTS),
.NUM_OUTPUTS (NUM_INPUTS),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG_RSP)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.ready_in (rsp_ready_in),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid_out),
.ready_out (rsp_ready_out),
`UNUSED_PIN (sel_out)
);
end
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
assign bus_in_if[i].rsp_valid = rsp_valid_out[i];
assign {bus_in_if[i].rsp_data.tag, bus_in_if[i].rsp_data.data} = rsp_data_out[i];
assign rsp_ready_out[i] = bus_in_if[i].rsp_ready;
end
endmodule

View File

@@ -0,0 +1,64 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
interface VX_mem_bus_if #(
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE)
) ();
typedef struct packed {
logic rw;
logic [DATA_SIZE-1:0] byteen;
logic [ADDR_WIDTH-1:0] addr;
logic [DATA_SIZE*8-1:0] data;
logic [TAG_WIDTH-1:0] tag;
} req_data_t;
typedef struct packed {
logic [DATA_SIZE*8-1:0] data;
logic [TAG_WIDTH-1:0] tag;
} rsp_data_t;
logic req_valid;
req_data_t req_data;
logic req_ready;
logic rsp_valid;
rsp_data_t rsp_data;
logic rsp_ready;
modport master (
output req_valid,
output req_data,
input req_ready,
input rsp_valid,
input rsp_data,
output rsp_ready
);
modport slave (
input req_valid,
input req_data,
output req_ready,
output rsp_valid,
output rsp_data,
input rsp_ready
);
endinterface

View File

@@ -0,0 +1,118 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
interface VX_mem_perf_if ();
wire [`PERF_CTR_BITS-1:0] icache_reads;
wire [`PERF_CTR_BITS-1:0] icache_read_misses;
wire [`PERF_CTR_BITS-1:0] dcache_reads;
wire [`PERF_CTR_BITS-1:0] dcache_writes;
wire [`PERF_CTR_BITS-1:0] dcache_read_misses;
wire [`PERF_CTR_BITS-1:0] dcache_write_misses;
wire [`PERF_CTR_BITS-1:0] dcache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] dcache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] smem_reads;
wire [`PERF_CTR_BITS-1:0] smem_writes;
wire [`PERF_CTR_BITS-1:0] smem_bank_stalls;
wire [`PERF_CTR_BITS-1:0] l2cache_reads;
wire [`PERF_CTR_BITS-1:0] l2cache_writes;
wire [`PERF_CTR_BITS-1:0] l2cache_read_misses;
wire [`PERF_CTR_BITS-1:0] l2cache_write_misses;
wire [`PERF_CTR_BITS-1:0] l2cache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] l2cache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] l3cache_reads;
wire [`PERF_CTR_BITS-1:0] l3cache_writes;
wire [`PERF_CTR_BITS-1:0] l3cache_read_misses;
wire [`PERF_CTR_BITS-1:0] l3cache_write_misses;
wire [`PERF_CTR_BITS-1:0] l3cache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] l3cache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] mem_reads;
wire [`PERF_CTR_BITS-1:0] mem_writes;
wire [`PERF_CTR_BITS-1:0] mem_latency;
modport master (
output icache_reads,
output icache_read_misses,
output dcache_reads,
output dcache_writes,
output dcache_read_misses,
output dcache_write_misses,
output dcache_bank_stalls,
output dcache_mshr_stalls,
output smem_reads,
output smem_writes,
output smem_bank_stalls,
output l2cache_reads,
output l2cache_writes,
output l2cache_read_misses,
output l2cache_write_misses,
output l2cache_bank_stalls,
output l2cache_mshr_stalls,
output l3cache_reads,
output l3cache_writes,
output l3cache_read_misses,
output l3cache_write_misses,
output l3cache_bank_stalls,
output l3cache_mshr_stalls,
output mem_reads,
output mem_writes,
output mem_latency
);
modport slave (
input icache_reads,
input icache_read_misses,
input dcache_reads,
input dcache_writes,
input dcache_read_misses,
input dcache_write_misses,
input dcache_bank_stalls,
input dcache_mshr_stalls,
input smem_reads,
input smem_writes,
input smem_bank_stalls,
input l2cache_reads,
input l2cache_writes,
input l2cache_read_misses,
input l2cache_write_misses,
input l2cache_bank_stalls,
input l2cache_mshr_stalls,
input l3cache_reads,
input l3cache_writes,
input l3cache_read_misses,
input l3cache_write_misses,
input l3cache_bank_stalls,
input l3cache_mshr_stalls,
input mem_reads,
input mem_writes,
input mem_latency
);
endinterface

209
hw/rtl/mem/VX_mem_unit.sv Normal file
View File

@@ -0,0 +1,209 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
`define SMEM_ADDR_STACK_OPT
module VX_mem_unit import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_mem_perf_if.master mem_perf_if,
`endif
VX_mem_bus_if.slave icache_bus_if [`NUM_SOCKETS],
VX_mem_bus_if.slave dcache_bus_if [`NUM_SOCKETS * DCACHE_NUM_REQS],
VX_mem_bus_if.master mem_bus_if
);
`ifdef PERF_ENABLE
VX_cache_perf_if perf_icache_if();
VX_cache_perf_if perf_dcache_if();
VX_cache_perf_if perf_l2cache_if();
`endif
/////////////////////////////// I-Cache ///////////////////////////////////
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_LINE_SIZE),
.TAG_WIDTH (ICACHE_MEM_TAG_WIDTH)
) icache_mem_bus_if();
`RESET_RELAY (icache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("cluster%0d-icache", CLUSTER_ID)),
.NUM_UNITS (`NUM_ICACHES),
.NUM_INPUTS (`NUM_SOCKETS),
.TAG_SEL_IDX (0),
.CACHE_SIZE (`ICACHE_SIZE),
.LINE_SIZE (ICACHE_LINE_SIZE),
.NUM_BANKS (1),
.NUM_WAYS (`ICACHE_NUM_WAYS),
.WORD_SIZE (ICACHE_WORD_SIZE),
.NUM_REQS (1),
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
.TAG_WIDTH (ICACHE_ARB_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (0),
.CORE_OUT_REG (2),
.MEM_OUT_REG (2)
) icache (
`ifdef PERF_ENABLE
.cache_perf_if (perf_icache_if),
`endif
.clk (clk),
.reset (icache_reset),
.core_bus_if (icache_bus_if),
.mem_bus_if (icache_mem_bus_if)
);
/////////////////////////////// D-Cache ///////////////////////////////////
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_LINE_SIZE),
.TAG_WIDTH (DCACHE_MEM_TAG_WIDTH)
) dcache_mem_bus_if();
`RESET_RELAY (dcache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("cluster%0d-dcache", CLUSTER_ID)),
.NUM_UNITS (`NUM_DCACHES),
.NUM_INPUTS (`NUM_SOCKETS),
.TAG_SEL_IDX (1),
.CACHE_SIZE (`DCACHE_SIZE),
.LINE_SIZE (DCACHE_LINE_SIZE),
.NUM_BANKS (`DCACHE_NUM_BANKS),
.NUM_WAYS (`DCACHE_NUM_WAYS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.NUM_REQS (DCACHE_NUM_REQS),
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (1),
.NC_ENABLE (1),
.CORE_OUT_REG (`SM_ENABLED ? 2 : 1),
.MEM_OUT_REG (2)
) dcache (
`ifdef PERF_ENABLE
.cache_perf_if (perf_dcache_if),
`endif
.clk (clk),
.reset (dcache_reset),
.core_bus_if (dcache_bus_if),
.mem_bus_if (dcache_mem_bus_if)
);
/////////////////////////////// L2-Cache //////////////////////////////////
VX_mem_bus_if #(
.DATA_SIZE (L2_WORD_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH)
) l2_mem_bus_if[L2_NUM_REQS]();
localparam I_MEM_ARB_IDX = 0;
localparam D_MEM_ARB_IDX = I_MEM_ARB_IDX + 1;
`ASSIGN_VX_MEM_BUS_IF_X (l2_mem_bus_if[I_MEM_ARB_IDX], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
`ASSIGN_VX_MEM_BUS_IF_X (l2_mem_bus_if[D_MEM_ARB_IDX], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("cluster%0d-l2cache", CLUSTER_ID)),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_WAYS (`L2_NUM_WAYS),
.WORD_SIZE (L2_WORD_SIZE),
.NUM_REQS (L2_NUM_REQS),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE),
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_REG (2),
.MEM_OUT_REG (2),
.NC_ENABLE (1),
.PASSTHRU (!`L2_ENABLED)
) l2cache (
.clk (clk),
.reset (l2_reset),
`ifdef PERF_ENABLE
.cache_perf_if (perf_l2cache_if),
`endif
.core_bus_if (l2_mem_bus_if),
.mem_bus_if (mem_bus_if)
);
`ifdef PERF_ENABLE
`UNUSED_VAR (perf_dcache_if.mem_stalls)
`UNUSED_VAR (perf_dcache_if.crsp_stalls)
assign mem_perf_if.icache_reads = perf_icache_if.reads;
assign mem_perf_if.icache_read_misses = perf_icache_if.read_misses;
assign mem_perf_if.dcache_reads = perf_dcache_if.reads;
assign mem_perf_if.dcache_writes = perf_dcache_if.writes;
assign mem_perf_if.dcache_read_misses = perf_dcache_if.read_misses;
assign mem_perf_if.dcache_write_misses= perf_dcache_if.write_misses;
assign mem_perf_if.dcache_bank_stalls = perf_dcache_if.bank_stalls;
assign mem_perf_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
`ifdef L2_ENABLE
assign mem_perf_if.l2cache_reads = perf_l2cache_if.reads;
assign mem_perf_if.l2cache_writes = perf_l2cache_if.writes;
assign mem_perf_if.l2cache_read_misses = perf_l2cache_if.read_misses;
assign mem_perf_if.l2cache_write_misses= perf_l2cache_if.write_misses;
assign mem_perf_if.l2cache_bank_stalls = perf_l2cache_if.bank_stalls;
assign mem_perf_if.l2cache_mshr_stalls = perf_l2cache_if.mshr_stalls;
`else
assign mem_perf_if.l2cache_reads = '0;
assign mem_perf_if.l2cache_writes = '0;
assign mem_perf_if.l2cache_read_misses = '0;
assign mem_perf_if.l2cache_write_misses= '0;
assign mem_perf_if.l2cache_bank_stalls = '0;
assign mem_perf_if.l2cache_mshr_stalls = '0;
`endif
assign mem_perf_if.l3cache_reads = '0;
assign mem_perf_if.l3cache_writes = '0;
assign mem_perf_if.l3cache_read_misses = '0;
assign mem_perf_if.l3cache_write_misses= '0;
assign mem_perf_if.l3cache_bank_stalls = '0;
assign mem_perf_if.l3cache_mshr_stalls = '0;
assign mem_perf_if.mem_reads = '0;
assign mem_perf_if.mem_writes = '0;
assign mem_perf_if.mem_latency = '0;
`endif
endmodule

330
hw/rtl/mem/VX_shared_mem.sv Normal file
View File

@@ -0,0 +1,330 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_shared_mem #(
parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes
parameter SIZE = (1024*16*8),
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 4,
// Address width
parameter ADDR_WIDTH = `CLOG2(SIZE),
// Size of a word in bytes
parameter WORD_SIZE = `XLEN/8,
// Request debug identifier
parameter UUID_WIDTH = 0,
// Request tag size
parameter TAG_WIDTH = 16
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_cache_perf_if.master cache_perf_if,
`endif
// Core request
input wire [NUM_REQS-1:0] req_valid,
input wire [NUM_REQS-1:0] req_rw,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] req_byteen,
input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] req_tag,
output wire [NUM_REQS-1:0] req_ready,
// Core response
output wire [NUM_REQS-1:0] rsp_valid,
output wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] rsp_data,
output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] rsp_tag,
input wire [NUM_REQS-1:0] rsp_ready
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (UUID_WIDTH)
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam WORD_WIDTH = WORD_SIZE * 8;
localparam NUM_WORDS = SIZE / WORD_SIZE;
localparam WORDS_PER_BANK = NUM_WORDS / NUM_BANKS;
localparam BANK_ADDR_WIDTH = `CLOG2(WORDS_PER_BANK);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
// bank selection
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
if (NUM_BANKS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_idx[i] = req_addr[i][0 +: BANK_SEL_BITS];
end
end else begin
assign req_bank_idx = 0;
end
// bank addressing
wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_addr[i] = req_addr[i][BANK_SEL_BITS +: BANK_ADDR_WIDTH];
end
// bank requests dispatch
wire [NUM_BANKS-1:0] per_bank_req_valid;
wire [NUM_BANKS-1:0] per_bank_req_rw;
wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] req_data_out;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_data_in[i] = {
req_rw[i],
req_bank_addr[i],
req_byteen[i],
req_data[i],
req_tag[i]};
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS),
.OUT_REG (3) // output should be registered for the data_store addressing
) req_xbar (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.collisions (cache_perf_if.bank_stalls),
`else
`UNUSED_PIN (collisions),
`endif
.valid_in (req_valid),
.data_in (req_data_in),
.sel_in (req_bank_idx),
.ready_in (req_ready),
.valid_out (per_bank_req_valid),
.data_out (req_data_out),
.sel_out (per_bank_req_idx),
.ready_out (per_bank_req_ready)
);
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign {
per_bank_req_rw[i],
per_bank_req_addr[i],
per_bank_req_byteen[i],
per_bank_req_data[i],
per_bank_req_tag[i]} = req_data_out[i];
end
// banks access
wire [NUM_BANKS-1:0] per_bank_rsp_valid;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_sp_ram #(
.DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK),
.WRENW (WORD_SIZE)
) data_store (
.clk (clk),
.read (1'b1),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]),
.rdata (per_bank_rsp_data[i])
);
// drop write response
wire per_bank_req_valid_w, per_bank_req_ready_w;
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i];
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i];
VX_elastic_buffer #(
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH),
.SIZE (0)
) bank_buf (
.clk (clk),
.reset (reset),
.valid_in (per_bank_req_valid_w),
.ready_in (per_bank_req_ready_w),
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}),
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}),
.valid_out (per_bank_rsp_valid[i]),
.ready_out (per_bank_rsp_ready[i])
);
end
// bank responses gather
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_data_in[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.OUT_REG (2)
) rsp_xbar (
.clk (clk),
.reset (reset),
`UNUSED_PIN (collisions),
.sel_in (per_bank_rsp_idx),
.valid_in (per_bank_rsp_valid),
.ready_in (per_bank_rsp_ready),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid),
.ready_out (rsp_ready),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign {rsp_data[i], rsp_tag[i]} = rsp_data_out[i];
end
`ifdef PERF_ENABLE
// per cycle: reads, writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
wire [NUM_REQS-1:0] perf_reads_per_req = req_valid & req_ready & ~req_rw;
wire [NUM_REQS-1:0] perf_writes_per_req = req_valid & req_ready & req_rw;
wire [NUM_REQS-1:0] perf_crsp_stall_per_req = rsp_valid & ~rsp_ready;
`POP_COUNT(perf_reads_per_cycle, perf_reads_per_req);
`POP_COUNT(perf_writes_per_cycle, perf_writes_per_req);
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
reg [`PERF_CTR_BITS-1:0] perf_reads;
reg [`PERF_CTR_BITS-1:0] perf_writes;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
perf_reads <= '0;
perf_writes <= '0;
perf_crsp_stalls <= '0;
end else begin
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle);
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end
assign cache_perf_if.reads = perf_reads;
assign cache_perf_if.writes = perf_writes;
assign cache_perf_if.read_misses = '0;
assign cache_perf_if.write_misses = '0;
assign cache_perf_if.mshr_stalls = '0;
assign cache_perf_if.mem_stalls = '0;
assign cache_perf_if.crsp_stalls = perf_crsp_stalls;
`endif
`ifdef DBG_TRACE_CACHE_BANK
wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] req_uuid;
wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] rsp_uuid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (UUID_WIDTH != 0) begin
assign req_uuid[i] = req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
assign rsp_uuid[i] = rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid[i] = 0;
assign rsp_uuid[i] = 0;
end
end
wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_req_uuid;
wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_rsp_uuid;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (UUID_WIDTH != 0) begin
assign per_bank_req_uuid[i] = per_bank_req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
assign per_bank_rsp_uuid[i] = per_bank_rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign per_bank_req_uuid[i] = 0;
assign per_bank_rsp_uuid[i] = 0;
end
end
always @(posedge clk) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (req_valid[i] && req_ready[i]) begin
if (req_rw[i]) begin
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, req_addr[i], req_tag[i], req_byteen[i], req_data[i], req_uuid[i]));
end else begin
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, req_addr[i], req_tag[i], req_uuid[i]));
end
end
if (rsp_valid[i] && rsp_ready[i]) begin
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, rsp_tag[i], rsp_data[i], rsp_uuid[i]));
end
end
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
if (per_bank_req_rw[i]) begin
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
end else begin
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i]));
end
end
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
end
end
end
`endif
endmodule

View File

@@ -0,0 +1,130 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_smem_switch #(
parameter NUM_REQS = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter TAG_SEL_IDX = 0,
parameter OUT_REG_REQ = 0,
parameter OUT_REG_RSP = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
VX_mem_bus_if.slave bus_in_if,
VX_mem_bus_if.master bus_out_if [NUM_REQS]
);
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
localparam TAG_OUT_WIDTH = TAG_WIDTH - LOG_NUM_REQS;
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_OUT_WIDTH + DATA_WIDTH;
wire [NUM_REQS-1:0] req_valid_out;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_REQS-1:0] req_ready_out;
wire [REQ_DATAW-1:0] req_data_in;
wire [TAG_OUT_WIDTH-1:0] req_tag_in;
wire [`UP(LOG_NUM_REQS)-1:0] req_sel_in;
VX_bits_remove #(
.N (TAG_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (bus_in_if.req_data.tag),
.data_out (req_tag_in)
);
if (NUM_REQS > 1) begin
assign req_sel_in = bus_in_if.req_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS];
end else begin
assign req_sel_in = '0;
end
assign req_data_in = {req_tag_in, bus_in_if.req_data.addr, bus_in_if.req_data.rw, bus_in_if.req_data.byteen, bus_in_if.req_data.data};
VX_stream_switch #(
.NUM_OUTPUTS (NUM_REQS),
.DATAW (REQ_DATAW),
.OUT_REG (OUT_REG_REQ)
) req_switch (
.clk (clk),
.reset (reset),
.sel_in (req_sel_in),
.valid_in (bus_in_if.req_valid),
.ready_in (bus_in_if.req_ready),
.data_in (req_data_in),
.data_out (req_data_out),
.valid_out (req_valid_out),
.ready_out (req_ready_out)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign bus_out_if[i].req_valid = req_valid_out[i];
assign {bus_out_if[i].req_data.tag, bus_out_if[i].req_data.addr, bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.data} = req_data_out[i];
assign req_ready_out[i] = bus_out_if[i].req_ready;
end
///////////////////////////////////////////////////////////////////////
wire [NUM_REQS-1:0] rsp_valid_out;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out;
wire [NUM_REQS-1:0] rsp_ready_out;
wire [RSP_DATAW-1:0] rsp_data_in;
wire [TAG_OUT_WIDTH-1:0] rsp_tag_in;
wire [`UP(LOG_NUM_REQS)-1:0] rsp_sel_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign rsp_valid_out[i] = bus_out_if[i].rsp_valid;
assign rsp_data_out[i] = {bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data};
assign bus_out_if[i].rsp_ready = rsp_ready_out[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG_RSP)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_out),
.ready_in (rsp_ready_out),
.data_in (rsp_data_out),
.data_out (rsp_data_in),
.sel_out (rsp_sel_in),
.valid_out (bus_in_if.rsp_valid),
.ready_out (bus_in_if.rsp_ready)
);
VX_bits_insert #(
.N (TAG_OUT_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (rsp_tag_in),
.sel_in (rsp_sel_in),
.data_out (bus_in_if.rsp_data.tag)
);
assign {rsp_tag_in, bus_in_if.rsp_data.data} = rsp_data_in;
endmodule