+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
596 lines
23 KiB
Systemverilog
596 lines
23 KiB
Systemverilog
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
`include "VX_cache_define.vh"
|
|
|
|
module VX_cache import VX_gpu_pkg::*; #(
|
|
parameter `STRING INSTANCE_ID = "",
|
|
|
|
// Number of Word requests per cycle
|
|
parameter NUM_REQS = 4,
|
|
|
|
// Size of cache in bytes
|
|
parameter CACHE_SIZE = 4096,
|
|
// Size of line inside a bank in bytes
|
|
parameter LINE_SIZE = 64,
|
|
// Number of banks
|
|
parameter NUM_BANKS = 1,
|
|
// Number of associative ways
|
|
parameter NUM_WAYS = 1,
|
|
// Size of a word in bytes
|
|
parameter WORD_SIZE = `XLEN/8,
|
|
|
|
// Core Response Queue Size
|
|
parameter CRSQ_SIZE = 2,
|
|
// Miss Reserv Queue Knob
|
|
parameter MSHR_SIZE = 8,
|
|
// Memory Response Queue Size
|
|
parameter MRSQ_SIZE = 0,
|
|
// Memory Request Queue Size
|
|
parameter MREQ_SIZE = 4,
|
|
|
|
// Enable cache writeable
|
|
parameter WRITE_ENABLE = 1,
|
|
|
|
// Request debug identifier
|
|
parameter UUID_WIDTH = 0,
|
|
|
|
// core request tag size
|
|
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
|
|
|
// Core response output register
|
|
parameter CORE_OUT_REG = 0,
|
|
|
|
// Memory request output register
|
|
parameter MEM_OUT_REG = 0
|
|
) (
|
|
// PERF
|
|
`ifdef PERF_ENABLE
|
|
output cache_perf_t cache_perf,
|
|
`endif
|
|
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
|
|
VX_mem_bus_if.master mem_bus_if
|
|
);
|
|
|
|
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid parameter"))
|
|
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
|
|
|
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
|
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
|
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
|
localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
|
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
|
localparam WORD_WIDTH = WORD_SIZE * 8;
|
|
localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE);
|
|
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
|
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
|
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
|
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
|
|
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
|
|
|
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
|
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
|
|
|
`ifdef PERF_ENABLE
|
|
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
|
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
|
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
|
`endif
|
|
|
|
wire [NUM_REQS-1:0] core_req_valid;
|
|
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
|
wire [NUM_REQS-1:0] core_req_rw;
|
|
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
|
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
|
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
|
wire [NUM_REQS-1:0] core_req_ready;
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
|
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
|
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
|
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
|
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
|
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
|
assign core_bus_if[i].req_ready = core_req_ready[i];
|
|
end
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
// Core response buffering
|
|
wire [NUM_REQS-1:0] core_rsp_valid_s;
|
|
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s;
|
|
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
|
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
|
|
`RESET_RELAY (core_rsp_reset, reset);
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
|
.SIZE (CORE_REQ_BUF_ENABLE ? `OUT_REG_TO_EB_SIZE(CORE_OUT_REG) : 0),
|
|
.OUT_REG (`OUT_REG_TO_EB_REG(CORE_OUT_REG))
|
|
) core_rsp_buf (
|
|
.clk (clk),
|
|
.reset (core_rsp_reset),
|
|
.valid_in (core_rsp_valid_s[i]),
|
|
.ready_in (core_rsp_ready_s[i]),
|
|
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
|
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
|
.valid_out (core_bus_if[i].rsp_valid),
|
|
.ready_out (core_bus_if[i].rsp_ready)
|
|
);
|
|
end
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
// Memory request buffering
|
|
wire mem_req_valid_s;
|
|
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
|
wire mem_req_rw_s;
|
|
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
|
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
|
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
|
wire mem_req_ready_s;
|
|
|
|
`RESET_RELAY (mem_req_buf_reset, reset);
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
|
.SIZE (MEM_REQ_BUF_ENABLE ? `OUT_REG_TO_EB_SIZE(MEM_OUT_REG) : 0),
|
|
.OUT_REG (`OUT_REG_TO_EB_REG(MEM_OUT_REG))
|
|
) mem_req_buf (
|
|
.clk (clk),
|
|
.reset (mem_req_buf_reset),
|
|
.valid_in (mem_req_valid_s),
|
|
.ready_in (mem_req_ready_s),
|
|
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
|
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
|
.valid_out (mem_bus_if.req_valid),
|
|
.ready_out (mem_bus_if.req_ready)
|
|
);
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
// Memory response buffering
|
|
wire mem_rsp_valid_s;
|
|
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
|
|
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
|
wire mem_rsp_ready_s;
|
|
|
|
`RESET_RELAY (mem_rsp_reset, reset);
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
|
.SIZE (MRSQ_SIZE),
|
|
.OUT_REG (MRSQ_SIZE > 2)
|
|
) mem_rsp_queue (
|
|
.clk (clk),
|
|
.reset (mem_rsp_reset),
|
|
.valid_in (mem_bus_if.rsp_valid),
|
|
.ready_in (mem_bus_if.rsp_ready),
|
|
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
|
|
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
|
|
.valid_out (mem_rsp_valid_s),
|
|
.ready_out (mem_rsp_ready_s)
|
|
);
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
|
|
wire init_enable;
|
|
|
|
`RESET_RELAY (init_reset, reset);
|
|
|
|
VX_cache_init #(
|
|
.CACHE_SIZE (CACHE_SIZE),
|
|
.LINE_SIZE (LINE_SIZE),
|
|
.NUM_BANKS (NUM_BANKS),
|
|
.NUM_WAYS (NUM_WAYS)
|
|
) cache_init (
|
|
.clk (clk),
|
|
.reset (init_reset),
|
|
.addr_out (init_line_sel),
|
|
.valid_out (init_enable)
|
|
);
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
|
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
|
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_core_req_wsel;
|
|
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
|
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
|
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
|
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
|
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
|
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
|
|
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
|
|
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
|
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
|
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
|
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
|
|
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
|
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
|
|
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
|
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
|
|
|
if (NUM_BANKS == 1) begin
|
|
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
|
end else begin
|
|
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)];
|
|
end
|
|
|
|
// Bank requests dispatch
|
|
|
|
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
|
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
|
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
|
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
|
|
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
if (WORDS_PER_LINE > 1) begin
|
|
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
|
end else begin
|
|
assign core_req_wsel[i] = '0;
|
|
end
|
|
assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH];
|
|
end
|
|
|
|
if (NUM_BANKS > 1) begin
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS];
|
|
end
|
|
end else begin
|
|
assign core_req_bid = '0;
|
|
end
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_req_data_in[i] = {
|
|
core_req_line_addr[i],
|
|
core_req_rw[i],
|
|
core_req_wsel[i],
|
|
core_req_byteen[i],
|
|
core_req_data[i],
|
|
core_req_tag[i]};
|
|
end
|
|
|
|
`ifdef PERF_ENABLE
|
|
wire [`PERF_CTR_BITS-1:0] perf_collisions;
|
|
`endif
|
|
|
|
`RESET_RELAY (req_xbar_reset, reset);
|
|
|
|
VX_stream_xbar #(
|
|
.NUM_INPUTS (NUM_REQS),
|
|
.NUM_OUTPUTS (NUM_BANKS),
|
|
.DATAW (CORE_REQ_DATAW),
|
|
.PERF_CTR_BITS (`PERF_CTR_BITS)
|
|
) req_xbar (
|
|
.clk (clk),
|
|
.reset (req_xbar_reset),
|
|
`ifdef PERF_ENABLE
|
|
.collisions(perf_collisions),
|
|
`else
|
|
`UNUSED_PIN(collisions),
|
|
`endif
|
|
.valid_in (core_req_valid),
|
|
.data_in (core_req_data_in),
|
|
.sel_in (core_req_bid),
|
|
.ready_in (core_req_ready),
|
|
.valid_out (per_bank_core_req_valid),
|
|
.data_out (core_req_data_out),
|
|
.sel_out (per_bank_core_req_idx),
|
|
.ready_out (per_bank_core_req_ready)
|
|
);
|
|
|
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
|
assign {
|
|
per_bank_core_req_addr[i],
|
|
per_bank_core_req_rw[i],
|
|
per_bank_core_req_wsel[i],
|
|
per_bank_core_req_byteen[i],
|
|
per_bank_core_req_data[i],
|
|
per_bank_core_req_tag[i]} = core_req_data_out[i];
|
|
end
|
|
|
|
// Banks access
|
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
|
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
|
wire curr_bank_mem_rsp_valid;
|
|
|
|
if (NUM_BANKS == 1) begin
|
|
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
|
end else begin
|
|
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i);
|
|
end
|
|
|
|
`RESET_RELAY (bank_reset, reset);
|
|
|
|
VX_cache_bank #(
|
|
.BANK_ID (i),
|
|
.INSTANCE_ID (INSTANCE_ID),
|
|
.CACHE_SIZE (CACHE_SIZE),
|
|
.LINE_SIZE (LINE_SIZE),
|
|
.NUM_BANKS (NUM_BANKS),
|
|
.NUM_WAYS (NUM_WAYS),
|
|
.WORD_SIZE (WORD_SIZE),
|
|
.NUM_REQS (NUM_REQS),
|
|
.CRSQ_SIZE (CRSQ_SIZE),
|
|
.MSHR_SIZE (MSHR_SIZE),
|
|
.MREQ_SIZE (MREQ_SIZE),
|
|
.WRITE_ENABLE (WRITE_ENABLE),
|
|
.UUID_WIDTH (UUID_WIDTH),
|
|
.TAG_WIDTH (TAG_WIDTH),
|
|
.CORE_OUT_REG (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_REG),
|
|
.MEM_OUT_REG (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_REG)
|
|
) bank (
|
|
.clk (clk),
|
|
.reset (bank_reset),
|
|
|
|
`ifdef PERF_ENABLE
|
|
.perf_read_misses (perf_read_miss_per_bank[i]),
|
|
.perf_write_misses (perf_write_miss_per_bank[i]),
|
|
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
|
`endif
|
|
|
|
// Core request
|
|
.core_req_valid (per_bank_core_req_valid[i]),
|
|
.core_req_addr (per_bank_core_req_addr[i]),
|
|
.core_req_rw (per_bank_core_req_rw[i]),
|
|
.core_req_wsel (per_bank_core_req_wsel[i]),
|
|
.core_req_byteen (per_bank_core_req_byteen[i]),
|
|
.core_req_data (per_bank_core_req_data[i]),
|
|
.core_req_tag (per_bank_core_req_tag[i]),
|
|
.core_req_idx (per_bank_core_req_idx[i]),
|
|
.core_req_ready (per_bank_core_req_ready[i]),
|
|
|
|
// Core response
|
|
.core_rsp_valid (per_bank_core_rsp_valid[i]),
|
|
.core_rsp_data (per_bank_core_rsp_data[i]),
|
|
.core_rsp_tag (per_bank_core_rsp_tag[i]),
|
|
.core_rsp_idx (per_bank_core_rsp_idx[i]),
|
|
.core_rsp_ready (per_bank_core_rsp_ready[i]),
|
|
|
|
// Memory request
|
|
.mem_req_valid (per_bank_mem_req_valid[i]),
|
|
.mem_req_addr (curr_bank_mem_req_addr),
|
|
.mem_req_rw (per_bank_mem_req_rw[i]),
|
|
.mem_req_wsel (per_bank_mem_req_wsel[i]),
|
|
.mem_req_byteen (per_bank_mem_req_byteen[i]),
|
|
.mem_req_data (per_bank_mem_req_data[i]),
|
|
.mem_req_id (per_bank_mem_req_id[i]),
|
|
.mem_req_ready (per_bank_mem_req_ready[i]),
|
|
|
|
// Memory response
|
|
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
|
.mem_rsp_data (mem_rsp_data_s),
|
|
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
|
.mem_rsp_ready (per_bank_mem_rsp_ready[i]),
|
|
|
|
// initialization
|
|
.init_enable (init_enable),
|
|
.init_line_sel (init_line_sel)
|
|
);
|
|
|
|
if (NUM_BANKS == 1) begin
|
|
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
|
end else begin
|
|
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
|
|
end
|
|
end
|
|
|
|
// Bank responses gather
|
|
|
|
wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in;
|
|
wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out;
|
|
|
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
|
assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]};
|
|
end
|
|
|
|
`RESET_RELAY (rsp_xbar_reset, reset);
|
|
|
|
VX_stream_xbar #(
|
|
.NUM_INPUTS (NUM_BANKS),
|
|
.NUM_OUTPUTS (NUM_REQS),
|
|
.DATAW (CORE_RSP_DATAW)
|
|
) rsp_xbar (
|
|
.clk (clk),
|
|
.reset (rsp_xbar_reset),
|
|
`UNUSED_PIN (collisions),
|
|
.valid_in (per_bank_core_rsp_valid),
|
|
.data_in (core_rsp_data_in),
|
|
.sel_in (per_bank_core_rsp_idx),
|
|
.ready_in (per_bank_core_rsp_ready),
|
|
.valid_out (core_rsp_valid_s),
|
|
.data_out (core_rsp_data_out),
|
|
.ready_out (core_rsp_ready_s),
|
|
`UNUSED_PIN (sel_out)
|
|
);
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i];
|
|
end
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire mem_req_valid_p;
|
|
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
|
wire mem_req_rw_p;
|
|
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p;
|
|
wire [WORD_SIZE-1:0] mem_req_byteen_p;
|
|
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
|
|
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
|
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
|
wire mem_req_ready_p;
|
|
|
|
// Memory request arbitration
|
|
|
|
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in;
|
|
|
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
|
assign data_in[i] = {per_bank_mem_req_addr[i],
|
|
per_bank_mem_req_rw[i],
|
|
per_bank_mem_req_wsel[i],
|
|
per_bank_mem_req_byteen[i],
|
|
per_bank_mem_req_data[i],
|
|
per_bank_mem_req_id[i]};
|
|
end
|
|
|
|
`RESET_RELAY (mem_req_arb_reset, reset);
|
|
|
|
VX_stream_arb #(
|
|
.NUM_INPUTS (NUM_BANKS),
|
|
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH),
|
|
.ARBITER ("R")
|
|
) mem_req_arb (
|
|
.clk (clk),
|
|
.reset (mem_req_arb_reset),
|
|
.valid_in (per_bank_mem_req_valid),
|
|
.ready_in (per_bank_mem_req_ready),
|
|
.data_in (data_in),
|
|
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}),
|
|
.valid_out (mem_req_valid_p),
|
|
.ready_out (mem_req_ready_p),
|
|
`UNUSED_PIN (sel_out)
|
|
);
|
|
|
|
if (NUM_BANKS > 1) begin
|
|
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
|
|
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
|
|
end else begin
|
|
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
|
|
end
|
|
|
|
// Memory request multi-port handling
|
|
|
|
assign mem_req_valid_s = mem_req_valid_p;
|
|
assign mem_req_addr_s = mem_req_addr_p;
|
|
assign mem_req_tag_s = mem_req_tag_p;
|
|
assign mem_req_ready_p = mem_req_ready_s;
|
|
|
|
if (WRITE_ENABLE != 0) begin
|
|
if (`CS_WORDS_PER_LINE > 1) begin
|
|
reg [LINE_SIZE-1:0] mem_req_byteen_r;
|
|
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r;
|
|
|
|
always @(*) begin
|
|
mem_req_byteen_r = '0;
|
|
mem_req_data_r = 'x;
|
|
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
|
|
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
|
|
end
|
|
assign mem_req_rw_s = mem_req_rw_p;
|
|
assign mem_req_byteen_s = mem_req_byteen_r;
|
|
assign mem_req_data_s = mem_req_data_r;
|
|
end else begin
|
|
`UNUSED_VAR (mem_req_wsel_p)
|
|
assign mem_req_rw_s = mem_req_rw_p;
|
|
assign mem_req_byteen_s = mem_req_byteen_p;
|
|
assign mem_req_data_s = mem_req_data_p;
|
|
end
|
|
end else begin
|
|
`UNUSED_VAR (mem_req_byteen_p)
|
|
`UNUSED_VAR (mem_req_wsel_p)
|
|
`UNUSED_VAR (mem_req_data_p)
|
|
`UNUSED_VAR (mem_req_rw_p)
|
|
|
|
assign mem_req_rw_s = 0;
|
|
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
|
|
assign mem_req_data_s = '0;
|
|
end
|
|
|
|
`ifdef PERF_ENABLE
|
|
// per cycle: core_reads, core_writes
|
|
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
|
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
|
|
|
wire [NUM_REQS-1:0] perf_core_reads_per_req = core_req_valid & core_req_ready & ~core_req_rw;
|
|
wire [NUM_REQS-1:0] perf_core_writes_per_req = core_req_valid & core_req_ready & core_req_rw;
|
|
|
|
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
|
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
|
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
|
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
|
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_crsp_stall_per_cycle;
|
|
|
|
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
|
|
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
|
|
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
|
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
|
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
|
|
|
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
|
|
end
|
|
|
|
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
|
|
|
wire perf_mem_stall_per_cycle = mem_bus_if.req_valid && ~mem_bus_if.req_ready;
|
|
|
|
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
|
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
|
reg [`PERF_CTR_BITS-1:0] perf_read_misses;
|
|
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
|
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
|
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
|
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
|
|
|
always @(posedge clk) begin
|
|
if (reset) begin
|
|
perf_core_reads <= '0;
|
|
perf_core_writes <= '0;
|
|
perf_read_misses <= '0;
|
|
perf_write_misses <= '0;
|
|
perf_mshr_stalls <= '0;
|
|
perf_mem_stalls <= '0;
|
|
perf_crsp_stalls <= '0;
|
|
end else begin
|
|
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
|
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
|
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
|
perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
|
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
|
perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle);
|
|
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
|
end
|
|
end
|
|
|
|
assign cache_perf.reads = perf_core_reads;
|
|
assign cache_perf.writes = perf_core_writes;
|
|
assign cache_perf.read_misses = perf_read_misses;
|
|
assign cache_perf.write_misses = perf_write_misses;
|
|
assign cache_perf.bank_stalls = perf_collisions;
|
|
assign cache_perf.mshr_stalls = perf_mshr_stalls;
|
|
assign cache_perf.mem_stalls = perf_mem_stalls;
|
|
assign cache_perf.crsp_stalls = perf_crsp_stalls;
|
|
`endif
|
|
|
|
endmodule
|