cache bindings and memory perf refactory

This commit is contained in:
Blaise Tine
2023-11-03 08:18:18 -04:00
parent 69f9ae778d
commit c9e6518e05
20 changed files with 746 additions and 1025 deletions

View File

@@ -77,8 +77,20 @@ module VX_core import VX_gpu_pkg::*; #(
) dcache_bus_tmp_if[DCACHE_NUM_REQS]();
`ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_tmp_if();
VX_pipeline_perf_if pipeline_perf_if();
VX_mem_perf_if mem_perf_tmp_if();
cache_perf_t smem_perf;
assign mem_perf_tmp_if.icache = mem_perf_if.icache;
assign mem_perf_tmp_if.dcache = mem_perf_if.dcache;
assign mem_perf_tmp_if.l2cache = mem_perf_if.l2cache;
assign mem_perf_tmp_if.l3cache = mem_perf_if.l3cache;
`ifdef SM_ENABLE
assign mem_perf_tmp_if.smem = smem_perf;
`else
assign mem_perf_tmp_if.smem = '0;
`endif
assign mem_perf_tmp_if.mem = mem_perf_if.mem;
`endif
`RESET_RELAY (dcr_data_reset, reset);
@@ -226,19 +238,28 @@ module VX_core import VX_gpu_pkg::*; #(
.sim_wb_value (sim_wb_value)
);
`ifdef SM_ENABLE
VX_smem_unit #(
.CORE_ID (CORE_ID)
) smem_unit (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.mem_perf_in_if (mem_perf_if),
.mem_perf_out_if (mem_perf_tmp_if),
.cache_perf (smem_perf),
`endif
.dcache_bus_in_if (dcache_bus_tmp_if),
.dcache_bus_out_if (dcache_bus_if)
);
`else
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i], dcache_bus_tmp_if[i]);
end
`endif
`ifdef PERF_ENABLE
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;

View File

@@ -221,63 +221,63 @@ import VX_fpu_pkg::*;
`VX_DCR_MPM_CLASS_MEM: begin
case (read_addr)
// PERF: icache
`VX_CSR_MPM_ICACHE_READS : read_data_ro_r = mem_perf_if.icache_reads[31:0];
`VX_CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.icache_reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_ICACHE_MISS_R : read_data_ro_r = mem_perf_if.icache_read_misses[31:0];
`VX_CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_ICACHE_READS : read_data_ro_r = mem_perf_if.icache.reads[31:0];
`VX_CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.icache.reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_ICACHE_MISS_R : read_data_ro_r = mem_perf_if.icache.read_misses[31:0];
`VX_CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.icache.read_misses[`PERF_CTR_BITS-1:32]);
// PERF: dcache
`VX_CSR_MPM_DCACHE_READS : read_data_ro_r = mem_perf_if.dcache_reads[31:0];
`VX_CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.dcache_reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_WRITES : read_data_ro_r = mem_perf_if.dcache_writes[31:0];
`VX_CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.dcache_writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_MISS_R : read_data_ro_r = mem_perf_if.dcache_read_misses[31:0];
`VX_CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_MISS_W : read_data_ro_r = mem_perf_if.dcache_write_misses[31:0];
`VX_CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = mem_perf_if.dcache_bank_stalls[31:0];
`VX_CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = mem_perf_if.dcache_mshr_stalls[31:0];
`VX_CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_READS : read_data_ro_r = mem_perf_if.dcache.reads[31:0];
`VX_CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.dcache.reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_WRITES : read_data_ro_r = mem_perf_if.dcache.writes[31:0];
`VX_CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.dcache.writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_MISS_R : read_data_ro_r = mem_perf_if.dcache.read_misses[31:0];
`VX_CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.dcache.read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_MISS_W : read_data_ro_r = mem_perf_if.dcache.write_misses[31:0];
`VX_CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.dcache.write_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = mem_perf_if.dcache.bank_stalls[31:0];
`VX_CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache.bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = mem_perf_if.dcache.mshr_stalls[31:0];
`VX_CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache.mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: smem
`VX_CSR_MPM_SMEM_READS : read_data_ro_r = mem_perf_if.smem_reads[31:0];
`VX_CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(mem_perf_if.smem_reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SMEM_WRITES : read_data_ro_r = mem_perf_if.smem_writes[31:0];
`VX_CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.smem_writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SMEM_BANK_ST : read_data_ro_r = mem_perf_if.smem_bank_stalls[31:0];
`VX_CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SMEM_READS : read_data_ro_r = mem_perf_if.smem.reads[31:0];
`VX_CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(mem_perf_if.smem.reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SMEM_WRITES : read_data_ro_r = mem_perf_if.smem.writes[31:0];
`VX_CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.smem.writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SMEM_BANK_ST : read_data_ro_r = mem_perf_if.smem.bank_stalls[31:0];
`VX_CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.smem.bank_stalls[`PERF_CTR_BITS-1:32]);
// PERF: l2cache
`VX_CSR_MPM_L2CACHE_READS : read_data_ro_r = mem_perf_if.l2cache_reads[31:0];
`VX_CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_WRITES : read_data_ro_r = mem_perf_if.l2cache_writes[31:0];
`VX_CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = mem_perf_if.l2cache_read_misses[31:0];
`VX_CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = mem_perf_if.l2cache_write_misses[31:0];
`VX_CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l2cache_bank_stalls[31:0];
`VX_CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l2cache_mshr_stalls[31:0];
`VX_CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_READS : read_data_ro_r = mem_perf_if.l2cache.reads[31:0];
`VX_CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l2cache.reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_WRITES : read_data_ro_r = mem_perf_if.l2cache.writes[31:0];
`VX_CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l2cache.writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = mem_perf_if.l2cache.read_misses[31:0];
`VX_CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l2cache.read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = mem_perf_if.l2cache.write_misses[31:0];
`VX_CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l2cache.write_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l2cache.bank_stalls[31:0];
`VX_CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache.bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l2cache.mshr_stalls[31:0];
`VX_CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache.mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: l3cache
`VX_CSR_MPM_L3CACHE_READS : read_data_ro_r = mem_perf_if.l3cache_reads[31:0];
`VX_CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_WRITES : read_data_ro_r = mem_perf_if.l3cache_writes[31:0];
`VX_CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = mem_perf_if.l3cache_read_misses[31:0];
`VX_CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = mem_perf_if.l3cache_write_misses[31:0];
`VX_CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l3cache_bank_stalls[31:0];
`VX_CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l3cache_mshr_stalls[31:0];
`VX_CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_READS : read_data_ro_r = mem_perf_if.l3cache.reads[31:0];
`VX_CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l3cache.reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_WRITES : read_data_ro_r = mem_perf_if.l3cache.writes[31:0];
`VX_CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l3cache.writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = mem_perf_if.l3cache.read_misses[31:0];
`VX_CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l3cache.read_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = mem_perf_if.l3cache.write_misses[31:0];
`VX_CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l3cache.write_misses[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l3cache.bank_stalls[31:0];
`VX_CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache.bank_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l3cache.mshr_stalls[31:0];
`VX_CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache.mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: memory
`VX_CSR_MPM_MEM_READS : read_data_ro_r = mem_perf_if.mem_reads[31:0];
`VX_CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(mem_perf_if.mem_reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_MEM_WRITES : read_data_ro_r = mem_perf_if.mem_writes[31:0];
`VX_CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.mem_writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_MEM_LAT : read_data_ro_r = mem_perf_if.mem_latency[31:0];
`VX_CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(mem_perf_if.mem_latency[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_MEM_READS : read_data_ro_r = mem_perf_if.mem.reads[31:0];
`VX_CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(mem_perf_if.mem.reads[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_MEM_WRITES : read_data_ro_r = mem_perf_if.mem.writes[31:0];
`VX_CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.mem.writes[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_MEM_LAT : read_data_ro_r = mem_perf_if.mem.latency[31:0];
`VX_CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(mem_perf_if.mem.latency[`PERF_CTR_BITS-1:32]);
default:;
endcase
end
@@ -299,6 +299,8 @@ import VX_fpu_pkg::*;
`ifdef PERF_ENABLE
wire [`PERF_CTR_BITS-1:0] perf_wctl_stalls = sfu_perf_if.wctl_stalls;
`UNUSED_VAR (perf_wctl_stalls);
`UNUSED_VAR (mem_perf_if.icache);
`UNUSED_VAR (mem_perf_if.smem);
`endif
endmodule

View File

@@ -20,8 +20,7 @@ module VX_smem_unit import VX_gpu_pkg::*; #(
input wire reset,
`ifdef PERF_ENABLE
VX_mem_perf_if.slave mem_perf_in_if,
VX_mem_perf_if.master mem_perf_out_if,
output cache_perf_t cache_perf,
`endif
VX_mem_bus_if.slave dcache_bus_in_if [DCACHE_NUM_REQS],
@@ -29,21 +28,78 @@ module VX_smem_unit import VX_gpu_pkg::*; #(
);
`UNUSED_PARAM (CORE_ID)
`ifdef SM_ENABLE
localparam SMEM_ADDR_WIDTH = `SMEM_LOG_SIZE - `CLOG2(DCACHE_WORD_SIZE);
wire [DCACHE_NUM_REQS-1:0] smem_req_valid;
wire [DCACHE_NUM_REQS-1:0] smem_req_rw;
wire [DCACHE_NUM_REQS-1:0][SMEM_ADDR_WIDTH-1:0] smem_req_addr;
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] smem_req_byteen;
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_req_data;
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_req_tag;
wire [DCACHE_NUM_REQS-1:0] smem_req_ready;
wire [DCACHE_NUM_REQS-1:0] smem_rsp_valid;
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_rsp_data;
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_rsp_tag;
wire [DCACHE_NUM_REQS-1:0] smem_rsp_ready;
`RESET_RELAY (smem_reset, reset);
VX_shared_mem #(
.INSTANCE_ID($sformatf("core%0d-smem", CORE_ID)),
.SIZE (1 << `SMEM_LOG_SIZE),
.NUM_REQS (DCACHE_NUM_REQS),
.NUM_BANKS (`SMEM_NUM_BANKS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.ADDR_WIDTH (SMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) shared_mem (
.clk (clk),
.reset (smem_reset),
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
// Core request
.req_valid (smem_req_valid),
.req_rw (smem_req_rw),
.req_byteen (smem_req_byteen),
.req_addr (smem_req_addr),
.req_data (smem_req_data),
.req_tag (smem_req_tag),
.req_ready (smem_req_ready),
// Core response
.rsp_valid (smem_rsp_valid),
.rsp_data (smem_rsp_data),
.rsp_tag (smem_rsp_tag),
.rsp_ready (smem_rsp_ready)
);
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) switch_out_bus_if[2 * DCACHE_NUM_REQS]();
`ifdef PERF_ENABLE
VX_cache_perf_if perf_smem_if();
`endif
`RESET_RELAY (switch_reset, reset);
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
assign smem_req_valid[i] = switch_out_bus_if[i * 2 + 1].req_valid;
assign smem_req_rw[i] = switch_out_bus_if[i * 2 + 1].req_data.rw;
assign smem_req_byteen[i] = switch_out_bus_if[i * 2 + 1].req_data.byteen;
assign smem_req_data[i] = switch_out_bus_if[i * 2 + 1].req_data.data;
assign smem_req_tag[i] = switch_out_bus_if[i * 2 + 1].req_data.tag;
assign switch_out_bus_if[i * 2 + 1].req_ready = smem_req_ready[i];
assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_rsp_valid[i];
assign switch_out_bus_if[i * 2 + 1].rsp_data.data = smem_rsp_data[i];
assign switch_out_bus_if[i * 2 + 1].rsp_data.tag = smem_rsp_tag[i];
assign smem_rsp_ready[i] = switch_out_bus_if[i * 2 + 1].rsp_ready;
assign smem_req_addr[i] = switch_out_bus_if[i * 2 + 1].req_data.addr[SMEM_ADDR_WIDTH-1:0];
VX_smem_switch #(
.NUM_REQS (2),
.DATA_SIZE (DCACHE_WORD_SIZE),
@@ -65,121 +121,4 @@ module VX_smem_unit import VX_gpu_pkg::*; #(
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_out_if[i], switch_out_bus_if[i * 2]);
end
wire [DCACHE_NUM_REQS-1:0] smem_req_valid;
wire [DCACHE_NUM_REQS-1:0] smem_req_rw;
wire [DCACHE_NUM_REQS-1:0][SMEM_ADDR_WIDTH-1:0] smem_req_addr;
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] smem_req_byteen;
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_req_data;
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_req_tag;
wire [DCACHE_NUM_REQS-1:0] smem_req_ready;
wire [DCACHE_NUM_REQS-1:0] smem_rsp_valid;
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_rsp_data;
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_rsp_tag;
wire [DCACHE_NUM_REQS-1:0] smem_rsp_ready;
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
assign smem_req_valid[i] = switch_out_bus_if[i * 2 + 1].req_valid;
assign smem_req_rw[i] = switch_out_bus_if[i * 2 + 1].req_data.rw;
assign smem_req_byteen[i] = switch_out_bus_if[i * 2 + 1].req_data.byteen;
assign smem_req_data[i] = switch_out_bus_if[i * 2 + 1].req_data.data;
assign smem_req_tag[i] = switch_out_bus_if[i * 2 + 1].req_data.tag;
assign switch_out_bus_if[i * 2 + 1].req_ready = smem_req_ready[i];
assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_rsp_valid[i];
assign switch_out_bus_if[i * 2 + 1].rsp_data.data = smem_rsp_data[i];
assign switch_out_bus_if[i * 2 + 1].rsp_data.tag = smem_rsp_tag[i];
assign smem_rsp_ready[i] = switch_out_bus_if[i * 2 + 1].rsp_ready;
assign smem_req_addr[i] = switch_out_bus_if[i * 2 + 1].req_data.addr[SMEM_ADDR_WIDTH-1:0];
end
`RESET_RELAY (smem_reset, reset);
VX_shared_mem #(
.INSTANCE_ID($sformatf("core%0d-smem", CORE_ID)),
.SIZE (1 << `SMEM_LOG_SIZE),
.NUM_REQS (DCACHE_NUM_REQS),
.NUM_BANKS (`SMEM_NUM_BANKS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.ADDR_WIDTH (SMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) shared_mem (
.clk (clk),
.reset (smem_reset),
`ifdef PERF_ENABLE
.cache_perf_if(perf_smem_if),
`endif
// Core request
.req_valid (smem_req_valid),
.req_rw (smem_req_rw),
.req_byteen (smem_req_byteen),
.req_addr (smem_req_addr),
.req_data (smem_req_data),
.req_tag (smem_req_tag),
.req_ready (smem_req_ready),
// Core response
.rsp_valid (smem_rsp_valid),
.rsp_data (smem_rsp_data),
.rsp_tag (smem_rsp_tag),
.rsp_ready (smem_rsp_ready)
);
`else
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_out_if[i], dcache_bus_in_if[i]);
end
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`endif
`ifdef PERF_ENABLE
assign mem_perf_out_if.icache_reads = mem_perf_in_if.icache_reads;
assign mem_perf_out_if.icache_read_misses = mem_perf_in_if.icache_read_misses;
assign mem_perf_out_if.dcache_reads = mem_perf_in_if.dcache_reads;
assign mem_perf_out_if.dcache_writes = mem_perf_in_if.dcache_writes;
assign mem_perf_out_if.dcache_read_misses = mem_perf_in_if.dcache_read_misses;
assign mem_perf_out_if.dcache_write_misses = mem_perf_in_if.dcache_write_misses;
assign mem_perf_out_if.dcache_bank_stalls = mem_perf_in_if.dcache_bank_stalls;
assign mem_perf_out_if.dcache_mshr_stalls = mem_perf_in_if.dcache_mshr_stalls;
assign mem_perf_out_if.l2cache_reads = mem_perf_in_if.l2cache_reads;
assign mem_perf_out_if.l2cache_writes = mem_perf_in_if.l2cache_writes;
assign mem_perf_out_if.l2cache_read_misses = mem_perf_in_if.l2cache_read_misses;
assign mem_perf_out_if.l2cache_write_misses = mem_perf_in_if.l2cache_write_misses;
assign mem_perf_out_if.l2cache_bank_stalls = mem_perf_in_if.l2cache_bank_stalls;
assign mem_perf_out_if.l2cache_mshr_stalls = mem_perf_in_if.l2cache_mshr_stalls;
assign mem_perf_out_if.l3cache_reads = mem_perf_in_if.l3cache_reads;
assign mem_perf_out_if.l3cache_writes = mem_perf_in_if.l3cache_writes;
assign mem_perf_out_if.l3cache_read_misses = mem_perf_in_if.l3cache_read_misses;
assign mem_perf_out_if.l3cache_write_misses = mem_perf_in_if.l3cache_write_misses;
assign mem_perf_out_if.l3cache_bank_stalls = mem_perf_in_if.l3cache_bank_stalls;
assign mem_perf_out_if.l3cache_mshr_stalls = mem_perf_in_if.l3cache_mshr_stalls;
assign mem_perf_out_if.mem_reads = mem_perf_in_if.mem_reads;
assign mem_perf_out_if.mem_writes = mem_perf_in_if.mem_writes;
assign mem_perf_out_if.mem_latency = mem_perf_in_if.mem_latency;
`ifdef SM_ENABLE
assign mem_perf_out_if.smem_reads = perf_smem_if.reads;
assign mem_perf_out_if.smem_writes = perf_smem_if.writes;
assign mem_perf_out_if.smem_bank_stalls = perf_smem_if.bank_stalls;
`else
assign mem_perf_out_if.smem_reads = '0;
assign mem_perf_out_if.smem_writes = '0;
assign mem_perf_out_if.smem_bank_stalls = '0;
`endif
`endif
endmodule

View File

@@ -358,9 +358,6 @@ task trace_ex_op(input int level,
`INST_SFU_CSRRW: begin if (use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
`INST_SFU_CSRRS: begin if (use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
`INST_SFU_CSRRC: begin if (use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
`INST_SFU_TEX: `TRACE(level, ("TEX"));
`INST_SFU_RASTER:`TRACE(level, ("RASTER"));
`INST_SFU_ROP: `TRACE(level, ("ROP"));
default: `TRACE(level, ("?"));
endcase
end