Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions

79
hw/rtl/mem/VX_gbar_arb.sv Normal file
View File

@@ -0,0 +1,79 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_gbar_arb #(
parameter NUM_REQS = 1,
parameter OUT_REG = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
VX_gbar_bus_if.slave bus_in_if [NUM_REQS],
VX_gbar_bus_if.master bus_out_if
);
localparam REQ_DATAW = `NB_WIDTH + `NC_WIDTH + `NC_WIDTH;
// arbitrate request
wire [NUM_REQS-1:0] req_valid_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_REQS-1:0] req_ready_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_valid_in[i] = bus_in_if[i].req_valid;
assign req_data_in[i] = {bus_in_if[i].req_id, bus_in_if[i].req_size_m1, bus_in_if[i].req_core_id};
assign bus_in_if[i].req_ready = req_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (1),
.DATAW (REQ_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.ready_in (req_ready_in),
.data_in (req_data_in),
.data_out ({bus_out_if.req_id, bus_out_if.req_size_m1, bus_out_if.req_core_id}),
.valid_out (bus_out_if.req_valid),
.ready_out (bus_out_if.req_ready),
`UNUSED_PIN (sel_out)
);
// broadcast response
reg rsp_valid;
reg [`NB_WIDTH-1:0] rsp_id;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
end else begin
rsp_valid <= bus_out_if.rsp_valid;
end
rsp_id <= bus_out_if.rsp_id;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign bus_in_if[i].rsp_valid = rsp_valid;
assign bus_in_if[i].rsp_id = rsp_id;
end
endmodule

View File

@@ -0,0 +1,49 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
interface VX_gbar_bus_if ();
wire req_valid;
wire [`NB_WIDTH-1:0] req_id;
wire [`NC_WIDTH-1:0] req_size_m1;
wire [`NC_WIDTH-1:0] req_core_id;
wire req_ready;
wire rsp_valid;
wire [`NB_WIDTH-1:0] rsp_id;
modport master (
output req_valid,
output req_id,
output req_size_m1,
output req_core_id,
input req_ready,
input rsp_valid,
input rsp_id
);
modport slave (
input req_valid,
input req_id,
input req_size_m1,
input req_core_id,
output req_ready,
output rsp_valid,
output rsp_id
);
endinterface

View File

@@ -0,0 +1,72 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_gbar_unit #(
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
VX_gbar_bus_if.slave gbar_bus_if
);
`UNUSED_SPARAM (INSTANCE_ID)
reg [`NB_WIDTH-1:0][`NUM_CORES-1:0] barrier_masks;
wire [`CLOG2(`NUM_CORES+1)-1:0] active_barrier_count;
wire [`NUM_CORES-1:0] curr_barrier_mask = barrier_masks[gbar_bus_if.req_id];
`POP_COUNT(active_barrier_count, curr_barrier_mask);
`UNUSED_VAR (active_barrier_count)
reg rsp_valid;
reg [`NB_WIDTH-1:0] rsp_bar_id;
always @(posedge clk) begin
if (reset) begin
barrier_masks <= '0;
rsp_valid <= 0;
end else begin
if (rsp_valid) begin
rsp_valid <= 0;
end
if (gbar_bus_if.req_valid) begin
if (active_barrier_count[`NC_WIDTH-1:0] == gbar_bus_if.req_size_m1) begin
barrier_masks[gbar_bus_if.req_id] <= '0;
rsp_bar_id <= gbar_bus_if.req_id;
rsp_valid <= 1;
end else begin
barrier_masks[gbar_bus_if.req_id][gbar_bus_if.req_core_id] <= 1;
end
end
end
end
assign gbar_bus_if.rsp_valid = rsp_valid;
assign gbar_bus_if.rsp_id = rsp_bar_id;
assign gbar_bus_if.req_ready = 1; // global barrier unit is always ready (no dependencies)
`ifdef DBG_TRACE_GBAR
always @(posedge clk) begin
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
`TRACE(1, ("%d: %s-acquire: bar_id=%0d, size=%0d, core_id=%0d\n",
$time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id));
end
if (gbar_bus_if.rsp_valid) begin
`TRACE(1, ("%d: %s-release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id));
end
end
`endif
endmodule

177
hw/rtl/mem/VX_mem_arb.sv Normal file
View File

@@ -0,0 +1,177 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_mem_arb #(
parameter NUM_INPUTS = 1,
parameter NUM_OUTPUTS = 1,
parameter DATA_SIZE = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)),
parameter TAG_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter OUT_REG_REQ = 0,
parameter OUT_REG_RSP = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
VX_mem_bus_if.slave bus_in_if [NUM_INPUTS],
VX_mem_bus_if.master bus_out_if [NUM_OUTPUTS]
);
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
`STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter"))
wire [NUM_INPUTS-1:0] req_valid_in;
wire [NUM_INPUTS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_INPUTS-1:0] req_ready_in;
wire [NUM_OUTPUTS-1:0] req_valid_out;
wire [NUM_OUTPUTS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_OUTPUTS-1:0][`UP(LOG_NUM_REQS)-1:0] req_sel_out;
wire [NUM_OUTPUTS-1:0] req_ready_out;
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
assign req_valid_in[i] = bus_in_if[i].req_valid;
assign req_data_in[i] = {bus_in_if[i].req_data.tag, bus_in_if[i].req_data.addr, bus_in_if[i].req_data.rw, bus_in_if[i].req_data.byteen, bus_in_if[i].req_data.data};
assign bus_in_if[i].req_ready = req_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_OUTPUTS),
.DATAW (REQ_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG_REQ)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.ready_in (req_ready_in),
.data_in (req_data_in),
.data_out (req_data_out),
.sel_out (req_sel_out),
.valid_out (req_valid_out),
.ready_out (req_ready_out)
);
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
wire [TAG_WIDTH-1:0] req_tag_out;
VX_bits_insert #(
.N (TAG_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (req_tag_out),
.sel_in (req_sel_out[i]),
.data_out (bus_out_if[i].req_data.tag)
);
assign bus_out_if[i].req_valid = req_valid_out[i];
assign {req_tag_out, bus_out_if[i].req_data.addr, bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.data} = req_data_out[i];
assign req_ready_out[i] = bus_out_if[i].req_ready;
end
///////////////////////////////////////////////////////////////////////////
wire [NUM_INPUTS-1:0] rsp_valid_out;
wire [NUM_INPUTS-1:0][RSP_DATAW-1:0] rsp_data_out;
wire [NUM_INPUTS-1:0] rsp_ready_out;
wire [NUM_OUTPUTS-1:0] rsp_valid_in;
wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_OUTPUTS-1:0] rsp_ready_in;
if (NUM_INPUTS > NUM_OUTPUTS) begin
wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in;
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
wire [TAG_WIDTH-1:0] rsp_tag_out;
VX_bits_remove #(
.N (TAG_WIDTH + LOG_NUM_REQS),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (bus_out_if[i].rsp_data.tag),
.data_out (rsp_tag_out)
);
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = {rsp_tag_out, bus_out_if[i].rsp_data.data};
assign bus_out_if[i].rsp_ready = rsp_ready_in[i];
if (NUM_INPUTS > 1) begin
assign rsp_sel_in[i] = bus_out_if[i].rsp_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS];
end else begin
assign rsp_sel_in[i] = '0;
end
end
VX_stream_switch #(
.NUM_INPUTS (NUM_OUTPUTS),
.NUM_OUTPUTS (NUM_INPUTS),
.DATAW (RSP_DATAW),
.OUT_REG (OUT_REG_RSP)
) rsp_switch (
.clk (clk),
.reset (reset),
.sel_in (rsp_sel_in),
.valid_in (rsp_valid_in),
.ready_in (rsp_ready_in),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid_out),
.ready_out (rsp_ready_out)
);
end else begin
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = {bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data};
assign bus_out_if[i].rsp_ready = rsp_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_OUTPUTS),
.NUM_OUTPUTS (NUM_INPUTS),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG_RSP)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.ready_in (rsp_ready_in),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid_out),
.ready_out (rsp_ready_out),
`UNUSED_PIN (sel_out)
);
end
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
assign bus_in_if[i].rsp_valid = rsp_valid_out[i];
assign {bus_in_if[i].rsp_data.tag, bus_in_if[i].rsp_data.data} = rsp_data_out[i];
assign rsp_ready_out[i] = bus_in_if[i].rsp_ready;
end
endmodule

View File

@@ -0,0 +1,64 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
interface VX_mem_bus_if #(
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE)
) ();
typedef struct packed {
logic rw;
logic [DATA_SIZE-1:0] byteen;
logic [ADDR_WIDTH-1:0] addr;
logic [DATA_SIZE*8-1:0] data;
logic [TAG_WIDTH-1:0] tag;
} req_data_t;
typedef struct packed {
logic [DATA_SIZE*8-1:0] data;
logic [TAG_WIDTH-1:0] tag;
} rsp_data_t;
logic req_valid;
req_data_t req_data;
logic req_ready;
logic rsp_valid;
rsp_data_t rsp_data;
logic rsp_ready;
modport master (
output req_valid,
output req_data,
input req_ready,
input rsp_valid,
input rsp_data,
output rsp_ready
);
modport slave (
input req_valid,
input req_data,
output req_ready,
output rsp_valid,
output rsp_data,
input rsp_ready
);
endinterface

View File

@@ -0,0 +1,43 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
interface VX_mem_perf_if import VX_gpu_pkg::*; ();
cache_perf_t icache;
cache_perf_t dcache;
cache_perf_t l2cache;
cache_perf_t l3cache;
cache_perf_t smem;
mem_perf_t mem;
modport master (
output icache,
output dcache,
output l2cache,
output l3cache,
output smem,
output mem
);
modport slave (
input icache,
input dcache,
input l2cache,
input l3cache,
input smem,
input mem
);
endinterface

335
hw/rtl/mem/VX_shared_mem.sv Normal file
View File

@@ -0,0 +1,335 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_shared_mem import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes
parameter SIZE = (1024*16*8),
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 4,
// Address width
parameter ADDR_WIDTH = `CLOG2(SIZE),
// Size of a word in bytes
parameter WORD_SIZE = `XLEN/8,
// Request debug identifier
parameter UUID_WIDTH = 0,
// Request tag size
parameter TAG_WIDTH = 16
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
// Core request
input wire [NUM_REQS-1:0] req_valid,
input wire [NUM_REQS-1:0] req_rw,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] req_byteen,
input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] req_tag,
output wire [NUM_REQS-1:0] req_ready,
// Core response
output wire [NUM_REQS-1:0] rsp_valid,
output wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] rsp_data,
output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] rsp_tag,
input wire [NUM_REQS-1:0] rsp_ready
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (UUID_WIDTH)
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam WORD_WIDTH = WORD_SIZE * 8;
localparam NUM_WORDS = SIZE / WORD_SIZE;
localparam WORDS_PER_BANK = NUM_WORDS / NUM_BANKS;
localparam BANK_ADDR_WIDTH = `CLOG2(WORDS_PER_BANK);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
// bank selection
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
if (NUM_BANKS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_idx[i] = req_addr[i][0 +: BANK_SEL_BITS];
end
end else begin
assign req_bank_idx = 0;
end
// bank addressing
wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_addr[i] = req_addr[i][BANK_SEL_BITS +: BANK_ADDR_WIDTH];
end
// bank requests dispatch
wire [NUM_BANKS-1:0] per_bank_req_valid;
wire [NUM_BANKS-1:0] per_bank_req_rw;
wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] req_data_out;
`ifdef PERF_ENABLE
wire [`PERF_CTR_BITS-1:0] perf_collisions;
`endif
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_data_in[i] = {
req_rw[i],
req_bank_addr[i],
req_byteen[i],
req_data[i],
req_tag[i]};
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS),
.OUT_REG (3) // output should be registered for the data_store addressing
) req_xbar (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.collisions (perf_collisions),
`else
`UNUSED_PIN (collisions),
`endif
.valid_in (req_valid),
.data_in (req_data_in),
.sel_in (req_bank_idx),
.ready_in (req_ready),
.valid_out (per_bank_req_valid),
.data_out (req_data_out),
.sel_out (per_bank_req_idx),
.ready_out (per_bank_req_ready)
);
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign {
per_bank_req_rw[i],
per_bank_req_addr[i],
per_bank_req_byteen[i],
per_bank_req_data[i],
per_bank_req_tag[i]} = req_data_out[i];
end
// banks access
wire [NUM_BANKS-1:0] per_bank_rsp_valid;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_sp_ram #(
.DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK),
.WRENW (WORD_SIZE)
) data_store (
.clk (clk),
.read (1'b1),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]),
.rdata (per_bank_rsp_data[i])
);
// drop write response
wire per_bank_req_valid_w, per_bank_req_ready_w;
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i];
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i];
VX_elastic_buffer #(
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH),
.SIZE (0)
) bank_buf (
.clk (clk),
.reset (reset),
.valid_in (per_bank_req_valid_w),
.ready_in (per_bank_req_ready_w),
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}),
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}),
.valid_out (per_bank_rsp_valid[i]),
.ready_out (per_bank_rsp_ready[i])
);
end
// bank responses gather
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_data_in[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.OUT_REG (2)
) rsp_xbar (
.clk (clk),
.reset (reset),
`UNUSED_PIN (collisions),
.sel_in (per_bank_rsp_idx),
.valid_in (per_bank_rsp_valid),
.ready_in (per_bank_rsp_ready),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid),
.ready_out (rsp_ready),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign {rsp_data[i], rsp_tag[i]} = rsp_data_out[i];
end
`ifdef PERF_ENABLE
// per cycle: reads, writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
wire [NUM_REQS-1:0] perf_reads_per_req = req_valid & req_ready & ~req_rw;
wire [NUM_REQS-1:0] perf_writes_per_req = req_valid & req_ready & req_rw;
wire [NUM_REQS-1:0] perf_crsp_stall_per_req = rsp_valid & ~rsp_ready;
`POP_COUNT(perf_reads_per_cycle, perf_reads_per_req);
`POP_COUNT(perf_writes_per_cycle, perf_writes_per_req);
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
reg [`PERF_CTR_BITS-1:0] perf_reads;
reg [`PERF_CTR_BITS-1:0] perf_writes;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
perf_reads <= '0;
perf_writes <= '0;
perf_crsp_stalls <= '0;
end else begin
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle);
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end
assign cache_perf.reads = perf_reads;
assign cache_perf.writes = perf_writes;
assign cache_perf.read_misses = '0;
assign cache_perf.write_misses = '0;
assign cache_perf.bank_stalls = perf_collisions;
assign cache_perf.mshr_stalls = '0;
assign cache_perf.mem_stalls = '0;
assign cache_perf.crsp_stalls = perf_crsp_stalls;
`endif
`ifdef DBG_TRACE_CACHE_BANK
wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] req_uuid;
wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] rsp_uuid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (UUID_WIDTH != 0) begin
assign req_uuid[i] = req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
assign rsp_uuid[i] = rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid[i] = 0;
assign rsp_uuid[i] = 0;
end
end
wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_req_uuid;
wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_rsp_uuid;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (UUID_WIDTH != 0) begin
assign per_bank_req_uuid[i] = per_bank_req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
assign per_bank_rsp_uuid[i] = per_bank_rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign per_bank_req_uuid[i] = 0;
assign per_bank_rsp_uuid[i] = 0;
end
end
always @(posedge clk) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (req_valid[i] && req_ready[i]) begin
if (req_rw[i]) begin
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, req_addr[i], req_tag[i], req_byteen[i], req_data[i], req_uuid[i]));
end else begin
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, req_addr[i], req_tag[i], req_uuid[i]));
end
end
if (rsp_valid[i] && rsp_ready[i]) begin
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, rsp_tag[i], rsp_data[i], rsp_uuid[i]));
end
end
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
if (per_bank_req_rw[i]) begin
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
end else begin
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i]));
end
end
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
end
end
end
`endif
endmodule

View File

@@ -0,0 +1,130 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_smem_switch #(
parameter NUM_REQS = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter TAG_SEL_IDX = 0,
parameter OUT_REG_REQ = 0,
parameter OUT_REG_RSP = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
VX_mem_bus_if.slave bus_in_if,
VX_mem_bus_if.master bus_out_if [NUM_REQS]
);
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
localparam TAG_OUT_WIDTH = TAG_WIDTH - LOG_NUM_REQS;
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_OUT_WIDTH + DATA_WIDTH;
wire [NUM_REQS-1:0] req_valid_out;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_REQS-1:0] req_ready_out;
wire [REQ_DATAW-1:0] req_data_in;
wire [TAG_OUT_WIDTH-1:0] req_tag_in;
wire [`UP(LOG_NUM_REQS)-1:0] req_sel_in;
VX_bits_remove #(
.N (TAG_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (bus_in_if.req_data.tag),
.data_out (req_tag_in)
);
if (NUM_REQS > 1) begin
assign req_sel_in = bus_in_if.req_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS];
end else begin
assign req_sel_in = '0;
end
assign req_data_in = {req_tag_in, bus_in_if.req_data.addr, bus_in_if.req_data.rw, bus_in_if.req_data.byteen, bus_in_if.req_data.data};
VX_stream_switch #(
.NUM_OUTPUTS (NUM_REQS),
.DATAW (REQ_DATAW),
.OUT_REG (OUT_REG_REQ)
) req_switch (
.clk (clk),
.reset (reset),
.sel_in (req_sel_in),
.valid_in (bus_in_if.req_valid),
.ready_in (bus_in_if.req_ready),
.data_in (req_data_in),
.data_out (req_data_out),
.valid_out (req_valid_out),
.ready_out (req_ready_out)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign bus_out_if[i].req_valid = req_valid_out[i];
assign {bus_out_if[i].req_data.tag, bus_out_if[i].req_data.addr, bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.data} = req_data_out[i];
assign req_ready_out[i] = bus_out_if[i].req_ready;
end
///////////////////////////////////////////////////////////////////////
wire [NUM_REQS-1:0] rsp_valid_out;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out;
wire [NUM_REQS-1:0] rsp_ready_out;
wire [RSP_DATAW-1:0] rsp_data_in;
wire [TAG_OUT_WIDTH-1:0] rsp_tag_in;
wire [`UP(LOG_NUM_REQS)-1:0] rsp_sel_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign rsp_valid_out[i] = bus_out_if[i].rsp_valid;
assign rsp_data_out[i] = {bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data};
assign bus_out_if[i].rsp_ready = rsp_ready_out[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_REG (OUT_REG_RSP)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_out),
.ready_in (rsp_ready_out),
.data_in (rsp_data_out),
.data_out (rsp_data_in),
.sel_out (rsp_sel_in),
.valid_out (bus_in_if.rsp_valid),
.ready_out (bus_in_if.rsp_ready)
);
VX_bits_insert #(
.N (TAG_OUT_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (rsp_tag_in),
.sel_in (rsp_sel_in),
.data_out (bus_in_if.rsp_data.tag)
);
assign {rsp_tag_in, bus_in_if.rsp_data.data} = rsp_data_in;
endmodule