cache bindings and memory perf refactory

This commit is contained in:
Blaise Tine
2023-11-03 08:18:18 -04:00
parent 69f9ae778d
commit c9e6518e05
20 changed files with 746 additions and 1025 deletions

View File

@@ -13,106 +13,31 @@
`include "VX_define.vh"
interface VX_mem_perf_if ();
interface VX_mem_perf_if import VX_gpu_pkg::*; ();
wire [`PERF_CTR_BITS-1:0] icache_reads;
wire [`PERF_CTR_BITS-1:0] icache_read_misses;
wire [`PERF_CTR_BITS-1:0] dcache_reads;
wire [`PERF_CTR_BITS-1:0] dcache_writes;
wire [`PERF_CTR_BITS-1:0] dcache_read_misses;
wire [`PERF_CTR_BITS-1:0] dcache_write_misses;
wire [`PERF_CTR_BITS-1:0] dcache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] dcache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] smem_reads;
wire [`PERF_CTR_BITS-1:0] smem_writes;
wire [`PERF_CTR_BITS-1:0] smem_bank_stalls;
wire [`PERF_CTR_BITS-1:0] l2cache_reads;
wire [`PERF_CTR_BITS-1:0] l2cache_writes;
wire [`PERF_CTR_BITS-1:0] l2cache_read_misses;
wire [`PERF_CTR_BITS-1:0] l2cache_write_misses;
wire [`PERF_CTR_BITS-1:0] l2cache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] l2cache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] l3cache_reads;
wire [`PERF_CTR_BITS-1:0] l3cache_writes;
wire [`PERF_CTR_BITS-1:0] l3cache_read_misses;
wire [`PERF_CTR_BITS-1:0] l3cache_write_misses;
wire [`PERF_CTR_BITS-1:0] l3cache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] l3cache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] mem_reads;
wire [`PERF_CTR_BITS-1:0] mem_writes;
wire [`PERF_CTR_BITS-1:0] mem_latency;
cache_perf_t icache;
cache_perf_t dcache;
cache_perf_t l2cache;
cache_perf_t l3cache;
cache_perf_t smem;
mem_perf_t mem;
modport master (
output icache_reads,
output icache_read_misses,
output dcache_reads,
output dcache_writes,
output dcache_read_misses,
output dcache_write_misses,
output dcache_bank_stalls,
output dcache_mshr_stalls,
output smem_reads,
output smem_writes,
output smem_bank_stalls,
output l2cache_reads,
output l2cache_writes,
output l2cache_read_misses,
output l2cache_write_misses,
output l2cache_bank_stalls,
output l2cache_mshr_stalls,
output l3cache_reads,
output l3cache_writes,
output l3cache_read_misses,
output l3cache_write_misses,
output l3cache_bank_stalls,
output l3cache_mshr_stalls,
output mem_reads,
output mem_writes,
output mem_latency
output icache,
output dcache,
output l2cache,
output l3cache,
output smem,
output mem
);
modport slave (
input icache_reads,
input icache_read_misses,
input dcache_reads,
input dcache_writes,
input dcache_read_misses,
input dcache_write_misses,
input dcache_bank_stalls,
input dcache_mshr_stalls,
input smem_reads,
input smem_writes,
input smem_bank_stalls,
input l2cache_reads,
input l2cache_writes,
input l2cache_read_misses,
input l2cache_write_misses,
input l2cache_bank_stalls,
input l2cache_mshr_stalls,
input l3cache_reads,
input l3cache_writes,
input l3cache_read_misses,
input l3cache_write_misses,
input l3cache_bank_stalls,
input l3cache_mshr_stalls,
input mem_reads,
input mem_writes,
input mem_latency
input icache,
input dcache,
input l2cache,
input l3cache,
input smem,
input mem
);
endinterface

View File

@@ -1,209 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
`define SMEM_ADDR_STACK_OPT
module VX_mem_unit import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_mem_perf_if.master mem_perf_if,
`endif
VX_mem_bus_if.slave icache_bus_if [`NUM_SOCKETS],
VX_mem_bus_if.slave dcache_bus_if [`NUM_SOCKETS * DCACHE_NUM_REQS],
VX_mem_bus_if.master mem_bus_if
);
`ifdef PERF_ENABLE
VX_cache_perf_if perf_icache_if();
VX_cache_perf_if perf_dcache_if();
VX_cache_perf_if perf_l2cache_if();
`endif
/////////////////////////////// I-Cache ///////////////////////////////////
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_LINE_SIZE),
.TAG_WIDTH (ICACHE_MEM_TAG_WIDTH)
) icache_mem_bus_if();
`RESET_RELAY (icache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("cluster%0d-icache", CLUSTER_ID)),
.NUM_UNITS (`NUM_ICACHES),
.NUM_INPUTS (`NUM_SOCKETS),
.TAG_SEL_IDX (0),
.CACHE_SIZE (`ICACHE_SIZE),
.LINE_SIZE (ICACHE_LINE_SIZE),
.NUM_BANKS (1),
.NUM_WAYS (`ICACHE_NUM_WAYS),
.WORD_SIZE (ICACHE_WORD_SIZE),
.NUM_REQS (1),
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
.TAG_WIDTH (ICACHE_ARB_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (0),
.CORE_OUT_REG (2),
.MEM_OUT_REG (2)
) icache (
`ifdef PERF_ENABLE
.cache_perf_if (perf_icache_if),
`endif
.clk (clk),
.reset (icache_reset),
.core_bus_if (icache_bus_if),
.mem_bus_if (icache_mem_bus_if)
);
/////////////////////////////// D-Cache ///////////////////////////////////
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_LINE_SIZE),
.TAG_WIDTH (DCACHE_MEM_TAG_WIDTH)
) dcache_mem_bus_if();
`RESET_RELAY (dcache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("cluster%0d-dcache", CLUSTER_ID)),
.NUM_UNITS (`NUM_DCACHES),
.NUM_INPUTS (`NUM_SOCKETS),
.TAG_SEL_IDX (1),
.CACHE_SIZE (`DCACHE_SIZE),
.LINE_SIZE (DCACHE_LINE_SIZE),
.NUM_BANKS (`DCACHE_NUM_BANKS),
.NUM_WAYS (`DCACHE_NUM_WAYS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.NUM_REQS (DCACHE_NUM_REQS),
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (1),
.NC_ENABLE (1),
.CORE_OUT_REG (`SM_ENABLED ? 2 : 1),
.MEM_OUT_REG (2)
) dcache (
`ifdef PERF_ENABLE
.cache_perf_if (perf_dcache_if),
`endif
.clk (clk),
.reset (dcache_reset),
.core_bus_if (dcache_bus_if),
.mem_bus_if (dcache_mem_bus_if)
);
/////////////////////////////// L2-Cache //////////////////////////////////
VX_mem_bus_if #(
.DATA_SIZE (L2_WORD_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH)
) l2_mem_bus_if[L2_NUM_REQS]();
localparam I_MEM_ARB_IDX = 0;
localparam D_MEM_ARB_IDX = I_MEM_ARB_IDX + 1;
`ASSIGN_VX_MEM_BUS_IF_X (l2_mem_bus_if[I_MEM_ARB_IDX], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
`ASSIGN_VX_MEM_BUS_IF_X (l2_mem_bus_if[D_MEM_ARB_IDX], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("cluster%0d-l2cache", CLUSTER_ID)),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_WAYS (`L2_NUM_WAYS),
.WORD_SIZE (L2_WORD_SIZE),
.NUM_REQS (L2_NUM_REQS),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE),
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_REG (2),
.MEM_OUT_REG (2),
.NC_ENABLE (1),
.PASSTHRU (!`L2_ENABLED)
) l2cache (
.clk (clk),
.reset (l2_reset),
`ifdef PERF_ENABLE
.cache_perf_if (perf_l2cache_if),
`endif
.core_bus_if (l2_mem_bus_if),
.mem_bus_if (mem_bus_if)
);
`ifdef PERF_ENABLE
`UNUSED_VAR (perf_dcache_if.mem_stalls)
`UNUSED_VAR (perf_dcache_if.crsp_stalls)
assign mem_perf_if.icache_reads = perf_icache_if.reads;
assign mem_perf_if.icache_read_misses = perf_icache_if.read_misses;
assign mem_perf_if.dcache_reads = perf_dcache_if.reads;
assign mem_perf_if.dcache_writes = perf_dcache_if.writes;
assign mem_perf_if.dcache_read_misses = perf_dcache_if.read_misses;
assign mem_perf_if.dcache_write_misses= perf_dcache_if.write_misses;
assign mem_perf_if.dcache_bank_stalls = perf_dcache_if.bank_stalls;
assign mem_perf_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
`ifdef L2_ENABLE
assign mem_perf_if.l2cache_reads = perf_l2cache_if.reads;
assign mem_perf_if.l2cache_writes = perf_l2cache_if.writes;
assign mem_perf_if.l2cache_read_misses = perf_l2cache_if.read_misses;
assign mem_perf_if.l2cache_write_misses= perf_l2cache_if.write_misses;
assign mem_perf_if.l2cache_bank_stalls = perf_l2cache_if.bank_stalls;
assign mem_perf_if.l2cache_mshr_stalls = perf_l2cache_if.mshr_stalls;
`else
assign mem_perf_if.l2cache_reads = '0;
assign mem_perf_if.l2cache_writes = '0;
assign mem_perf_if.l2cache_read_misses = '0;
assign mem_perf_if.l2cache_write_misses= '0;
assign mem_perf_if.l2cache_bank_stalls = '0;
assign mem_perf_if.l2cache_mshr_stalls = '0;
`endif
assign mem_perf_if.l3cache_reads = '0;
assign mem_perf_if.l3cache_writes = '0;
assign mem_perf_if.l3cache_read_misses = '0;
assign mem_perf_if.l3cache_write_misses= '0;
assign mem_perf_if.l3cache_bank_stalls = '0;
assign mem_perf_if.l3cache_mshr_stalls = '0;
assign mem_perf_if.mem_reads = '0;
assign mem_perf_if.mem_writes = '0;
assign mem_perf_if.mem_latency = '0;
`endif
endmodule

View File

@@ -13,7 +13,7 @@
`include "VX_define.vh"
module VX_shared_mem #(
module VX_shared_mem import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes
@@ -40,7 +40,7 @@ module VX_shared_mem #(
// PERF
`ifdef PERF_ENABLE
VX_cache_perf_if.master cache_perf_if,
output cache_perf_t cache_perf,
`endif
// Core request
@@ -106,6 +106,10 @@ module VX_shared_mem #(
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] req_data_out;
`ifdef PERF_ENABLE
wire [`PERF_CTR_BITS-1:0] perf_collisions;
`endif
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_data_in[i] = {
req_rw[i],
@@ -125,7 +129,7 @@ module VX_shared_mem #(
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.collisions (cache_perf_if.bank_stalls),
.collisions (perf_collisions),
`else
`UNUSED_PIN (collisions),
`endif
@@ -253,13 +257,14 @@ module VX_shared_mem #(
end
end
assign cache_perf_if.reads = perf_reads;
assign cache_perf_if.writes = perf_writes;
assign cache_perf_if.read_misses = '0;
assign cache_perf_if.write_misses = '0;
assign cache_perf_if.mshr_stalls = '0;
assign cache_perf_if.mem_stalls = '0;
assign cache_perf_if.crsp_stalls = perf_crsp_stalls;
assign cache_perf.reads = perf_reads;
assign cache_perf.writes = perf_writes;
assign cache_perf.read_misses = '0;
assign cache_perf.write_misses = '0;
assign cache_perf.bank_stalls = perf_collisions;
assign cache_perf.mshr_stalls = '0;
assign cache_perf.mem_stalls = '0;
assign cache_perf.crsp_stalls = perf_crsp_stalls;
`endif