Files
vortex/hw/rtl/mem/VX_shared_mem.sv
Blaise Tine c1e168fdbe Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
2023-11-10 02:47:05 -08:00

336 lines
12 KiB
Systemverilog

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_shared_mem import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes
parameter SIZE = (1024*16*8),
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 4,
// Address width
parameter ADDR_WIDTH = `CLOG2(SIZE),
// Size of a word in bytes
parameter WORD_SIZE = `XLEN/8,
// Request debug identifier
parameter UUID_WIDTH = 0,
// Request tag size
parameter TAG_WIDTH = 16
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
// Core request
input wire [NUM_REQS-1:0] req_valid,
input wire [NUM_REQS-1:0] req_rw,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] req_byteen,
input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] req_tag,
output wire [NUM_REQS-1:0] req_ready,
// Core response
output wire [NUM_REQS-1:0] rsp_valid,
output wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] rsp_data,
output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] rsp_tag,
input wire [NUM_REQS-1:0] rsp_ready
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (UUID_WIDTH)
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam WORD_WIDTH = WORD_SIZE * 8;
localparam NUM_WORDS = SIZE / WORD_SIZE;
localparam WORDS_PER_BANK = NUM_WORDS / NUM_BANKS;
localparam BANK_ADDR_WIDTH = `CLOG2(WORDS_PER_BANK);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
`STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter"))
// bank selection
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;
if (NUM_BANKS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_idx[i] = req_addr[i][0 +: BANK_SEL_BITS];
end
end else begin
assign req_bank_idx = 0;
end
// bank addressing
wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_addr[i] = req_addr[i][BANK_SEL_BITS +: BANK_ADDR_WIDTH];
end
// bank requests dispatch
wire [NUM_BANKS-1:0] per_bank_req_valid;
wire [NUM_BANKS-1:0] per_bank_req_rw;
wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] req_data_out;
`ifdef PERF_ENABLE
wire [`PERF_CTR_BITS-1:0] perf_collisions;
`endif
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_data_in[i] = {
req_rw[i],
req_bank_addr[i],
req_byteen[i],
req_data[i],
req_tag[i]};
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_REQS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS),
.OUT_REG (3) // output should be registered for the data_store addressing
) req_xbar (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.collisions (perf_collisions),
`else
`UNUSED_PIN (collisions),
`endif
.valid_in (req_valid),
.data_in (req_data_in),
.sel_in (req_bank_idx),
.ready_in (req_ready),
.valid_out (per_bank_req_valid),
.data_out (req_data_out),
.sel_out (per_bank_req_idx),
.ready_out (per_bank_req_ready)
);
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign {
per_bank_req_rw[i],
per_bank_req_addr[i],
per_bank_req_byteen[i],
per_bank_req_data[i],
per_bank_req_tag[i]} = req_data_out[i];
end
// banks access
wire [NUM_BANKS-1:0] per_bank_rsp_valid;
wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_rsp_ready;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_sp_ram #(
.DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK),
.WRENW (WORD_SIZE)
) data_store (
.clk (clk),
.read (1'b1),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]),
.rdata (per_bank_rsp_data[i])
);
// drop write response
wire per_bank_req_valid_w, per_bank_req_ready_w;
assign per_bank_req_valid_w = per_bank_req_valid[i] && ~per_bank_req_rw[i];
assign per_bank_req_ready[i] = per_bank_req_ready_w || per_bank_req_rw[i];
VX_elastic_buffer #(
.DATAW (REQ_SEL_WIDTH + TAG_WIDTH),
.SIZE (0)
) bank_buf (
.clk (clk),
.reset (reset),
.valid_in (per_bank_req_valid_w),
.ready_in (per_bank_req_ready_w),
.data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}),
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}),
.valid_out (per_bank_rsp_valid[i]),
.ready_out (per_bank_rsp_ready[i])
);
end
// bank responses gather
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_data_in[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.OUT_REG (2)
) rsp_xbar (
.clk (clk),
.reset (reset),
`UNUSED_PIN (collisions),
.sel_in (per_bank_rsp_idx),
.valid_in (per_bank_rsp_valid),
.ready_in (per_bank_rsp_ready),
.data_in (rsp_data_in),
.data_out (rsp_data_out),
.valid_out (rsp_valid),
.ready_out (rsp_ready),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign {rsp_data[i], rsp_tag[i]} = rsp_data_out[i];
end
`ifdef PERF_ENABLE
// per cycle: reads, writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
wire [NUM_REQS-1:0] perf_reads_per_req = req_valid & req_ready & ~req_rw;
wire [NUM_REQS-1:0] perf_writes_per_req = req_valid & req_ready & req_rw;
wire [NUM_REQS-1:0] perf_crsp_stall_per_req = rsp_valid & ~rsp_ready;
`POP_COUNT(perf_reads_per_cycle, perf_reads_per_req);
`POP_COUNT(perf_writes_per_cycle, perf_writes_per_req);
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
reg [`PERF_CTR_BITS-1:0] perf_reads;
reg [`PERF_CTR_BITS-1:0] perf_writes;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
perf_reads <= '0;
perf_writes <= '0;
perf_crsp_stalls <= '0;
end else begin
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle);
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end
assign cache_perf.reads = perf_reads;
assign cache_perf.writes = perf_writes;
assign cache_perf.read_misses = '0;
assign cache_perf.write_misses = '0;
assign cache_perf.bank_stalls = perf_collisions;
assign cache_perf.mshr_stalls = '0;
assign cache_perf.mem_stalls = '0;
assign cache_perf.crsp_stalls = perf_crsp_stalls;
`endif
`ifdef DBG_TRACE_CACHE_BANK
wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] req_uuid;
wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] rsp_uuid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (UUID_WIDTH != 0) begin
assign req_uuid[i] = req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
assign rsp_uuid[i] = rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid[i] = 0;
assign rsp_uuid[i] = 0;
end
end
wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_req_uuid;
wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_rsp_uuid;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (UUID_WIDTH != 0) begin
assign per_bank_req_uuid[i] = per_bank_req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
assign per_bank_rsp_uuid[i] = per_bank_rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign per_bank_req_uuid[i] = 0;
assign per_bank_rsp_uuid[i] = 0;
end
end
always @(posedge clk) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (req_valid[i] && req_ready[i]) begin
if (req_rw[i]) begin
`TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, req_addr[i], req_tag[i], req_byteen[i], req_data[i], req_uuid[i]));
end else begin
`TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, req_addr[i], req_tag[i], req_uuid[i]));
end
end
if (rsp_valid[i] && rsp_ready[i]) begin
`TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, rsp_tag[i], rsp_data[i], rsp_uuid[i]));
end
end
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin
if (per_bank_req_rw[i]) begin
`TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i]));
end else begin
`TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i]));
end
end
if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin
`TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i]));
end
end
end
`endif
endmodule