cache bindings and memory perf refactory
This commit is contained in:
216
hw/rtl/Vortex.sv
216
hw/rtl/Vortex.sv
@@ -45,90 +45,28 @@ module Vortex import VX_gpu_pkg::*; (
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if[`NUM_CLUSTERS]();
|
||||
VX_mem_perf_if perf_memsys_total_if();
|
||||
VX_cache_perf_if perf_l3cache_if();
|
||||
`endif
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
cache_perf_t perf_l3cache;
|
||||
mem_perf_t mem_perf;
|
||||
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.l2cache = 'x;
|
||||
assign mem_perf_if.l3cache = perf_l3cache;
|
||||
assign mem_perf_if.smem = 'x;
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L2_LINE_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
|
||||
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L3_LINE_SIZE),
|
||||
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
|
||||
) mem_bus_if();
|
||||
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen= mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire sim_ebreak /* verilator public */;
|
||||
wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value /* verilator public */;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_sim_ebreak;
|
||||
wire [`NUM_CLUSTERS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_cluster_sim_wb_value;
|
||||
assign sim_ebreak = per_cluster_sim_ebreak[0];
|
||||
assign sim_wb_value = per_cluster_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_cluster_sim_ebreak)
|
||||
`UNUSED_VAR (per_cluster_sim_wb_value)
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L2_LINE_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
|
||||
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
assign dcr_bus_if.write_data = dcr_wr_data;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if[i]),
|
||||
.perf_memsys_total_if (perf_memsys_total_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
|
||||
.sim_ebreak (per_cluster_sim_ebreak[i]),
|
||||
.sim_wb_value (per_cluster_sim_wb_value[i]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_BUSY (busy, (| per_cluster_busy), (`NUM_CLUSTERS > 1));
|
||||
|
||||
`RESET_RELAY (l3_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
@@ -155,49 +93,83 @@ module Vortex import VX_gpu_pkg::*; (
|
||||
.reset (l3_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf_if (perf_l3cache_if),
|
||||
.cache_perf (perf_l3cache),
|
||||
`endif
|
||||
|
||||
.core_bus_if (per_cluster_mem_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen= mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire sim_ebreak /* verilator public */;
|
||||
wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value /* verilator public */;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_sim_ebreak;
|
||||
wire [`NUM_CLUSTERS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_cluster_sim_wb_value;
|
||||
assign sim_ebreak = per_cluster_sim_ebreak[0];
|
||||
assign sim_wb_value = per_cluster_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_cluster_sim_ebreak)
|
||||
`UNUSED_VAR (per_cluster_sim_wb_value)
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
assign dcr_bus_if.write_data = dcr_wr_data;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
|
||||
.sim_ebreak (per_cluster_sim_ebreak[i]),
|
||||
.sim_wb_value (per_cluster_sim_wb_value[i]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_BUSY (busy, (| per_cluster_busy), (`NUM_CLUSTERS > 1));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, icache_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, icache_read_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_writes, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_read_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_write_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_bank_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_mshr_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, smem_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, smem_writes, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, smem_bank_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_writes, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_read_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_write_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_bank_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_mshr_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
assign perf_memsys_total_if.l3cache_reads = perf_l3cache_if.reads;
|
||||
assign perf_memsys_total_if.l3cache_writes = perf_l3cache_if.writes;
|
||||
assign perf_memsys_total_if.l3cache_read_misses = perf_l3cache_if.read_misses;
|
||||
assign perf_memsys_total_if.l3cache_write_misses= perf_l3cache_if.write_misses;
|
||||
assign perf_memsys_total_if.l3cache_bank_stalls = perf_l3cache_if.bank_stalls;
|
||||
assign perf_memsys_total_if.l3cache_mshr_stalls = perf_l3cache_if.mshr_stalls;
|
||||
`else
|
||||
assign perf_memsys_total_if.l3cache_reads = '0;
|
||||
assign perf_memsys_total_if.l3cache_writes = '0;
|
||||
assign perf_memsys_total_if.l3cache_read_misses = '0;
|
||||
assign perf_memsys_total_if.l3cache_write_misses= '0;
|
||||
assign perf_memsys_total_if.l3cache_bank_stalls = '0;
|
||||
assign perf_memsys_total_if.l3cache_mshr_stalls = '0;
|
||||
`endif
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -208,30 +180,20 @@ module Vortex import VX_gpu_pkg::*; (
|
||||
`PERF_CTR_BITS'($signed(2'(mem_req_fire && ~mem_bus_if.req_data.rw) - 2'(mem_rsp_fire)));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_reads <= '0;
|
||||
perf_mem_writes <= '0;
|
||||
perf_mem_lat <= '0;
|
||||
mem_perf <= '0;
|
||||
end else begin
|
||||
if (mem_req_fire && ~mem_bus_if.req_data.rw) begin
|
||||
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'(1);
|
||||
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
if (mem_req_fire && mem_bus_if.req_data.rw) begin
|
||||
perf_mem_writes <= perf_mem_writes + `PERF_CTR_BITS'(1);
|
||||
mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
perf_mem_lat <= perf_mem_lat + perf_mem_pending_reads;
|
||||
mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_total_if.mem_reads = perf_mem_reads;
|
||||
assign perf_memsys_total_if.mem_writes = perf_mem_writes;
|
||||
assign perf_memsys_total_if.mem_latency = perf_mem_lat;
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user