merging perf counters
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
#+define+SCOPE
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
|
||||
@@ -297,6 +297,9 @@ module VX_cluster #(
|
||||
);
|
||||
|
||||
if (`L2_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l2cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_valid_qual;
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_rw_qual;
|
||||
@@ -345,10 +348,14 @@ module VX_cluster #(
|
||||
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l2cache_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_core_dram_req_valid_qual),
|
||||
.core_req_rw (per_core_dram_req_rw_qual),
|
||||
|
||||
@@ -126,10 +126,12 @@
|
||||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
// User Floating-Point CSRs
|
||||
`define CSR_FFLAGS 12'h001
|
||||
`define CSR_FRM 12'h002
|
||||
`define CSR_FCSR 12'h003
|
||||
|
||||
// SIMT CSRs
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GTID 12'h022
|
||||
@@ -153,11 +155,73 @@
|
||||
|
||||
`define CSR_MEPC 12'h341
|
||||
|
||||
`define CSR_CYCLE 12'hC00
|
||||
`define CSR_CYCLE_H 12'hC80
|
||||
`define CSR_INSTRET 12'hC02
|
||||
`define CSR_INSTRET_H 12'hC82
|
||||
// Machine Counter/Timers
|
||||
`define CSR_MCYCLE 12'hB00
|
||||
`define CSR_MCYCLE_H 12'hB80
|
||||
`define CSR_MINSTRET 12'hB02
|
||||
`define CSR_MINSTRET_H 12'hB82
|
||||
|
||||
// Machine Performance-monitoring counters
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_ICACHE_ST 12'hB03
|
||||
`define CSR_MPM_ICACHE_ST_H 12'hB83
|
||||
`define CSR_MPM_IBUF_ST 12'hB04
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB84
|
||||
`define CSR_MPM_SCRB_ST 12'hB05
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB85
|
||||
`define CSR_MPM_ALU_ST 12'hB06
|
||||
`define CSR_MPM_ALU_ST_H 12'hB86
|
||||
`define CSR_MPM_LSU_ST 12'hB07
|
||||
`define CSR_MPM_LSU_ST_H 12'hB87
|
||||
`define CSR_MPM_CSR_ST 12'hB08
|
||||
`define CSR_MPM_CSR_ST_H 12'hB88
|
||||
`define CSR_MPM_MUL_ST 12'hB09
|
||||
`define CSR_MPM_MUL_ST_H 12'hB89
|
||||
`define CSR_MPM_FPU_ST 12'hB0A
|
||||
`define CSR_MPM_FPU_ST_H 12'hB8A
|
||||
`define CSR_MPM_GPU_ST 12'hB0B
|
||||
`define CSR_MPM_GPU_ST_H 12'hB8B
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // read misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8C
|
||||
`define CSR_MPM_ICACHE_DREQ_ST 12'hB0D // dram request stalls
|
||||
`define CSR_MPM_ICACHE_DREQ_ST_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0E // core response stalls
|
||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8E
|
||||
`define CSR_MPM_ICACHE_MSHR_ST 12'hB0F // MSHR stalls
|
||||
`define CSR_MPM_ICACHE_MSHR_ST_H 12'hB8F
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB10 // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB90
|
||||
`define CSR_MPM_ICACHE_READS 12'hB11 // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB91
|
||||
// PERF: dcache
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB12 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB13 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_DREQ_ST 12'hB14 // dram request stalls
|
||||
`define CSR_MPM_DCACHE_DREQ_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB16 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB96
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB17 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB97
|
||||
`define CSR_MPM_DCACHE_READS 12'hB18 // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB98
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB19 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB99
|
||||
`define CSR_MPM_DCACHE_EVICTS 12'hB1A // total evictions
|
||||
`define CSR_MPM_DCACHE_EVICTS_H 12'hB9A
|
||||
// PERF: memory
|
||||
`define CSR_MPM_DRAM_LAT 12'hB1B // dram latency (total)
|
||||
`define CSR_MPM_DRAM_LAT_H 12'hB9B
|
||||
`define CSR_MPM_DRAM_REQ 12'hB1C // dram requests
|
||||
`define CSR_MPM_DRAM_REQ_H 12'hB9C
|
||||
`define CSR_MPM_DRAM_RSP 12'hB1D // dram responses
|
||||
`define CSR_MPM_DRAM_RSP_H 12'hB9D
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
`define CSR_MARCHID 12'hF12
|
||||
`define CSR_MIMPID 12'hF13
|
||||
@@ -185,6 +249,38 @@
|
||||
`define FPUQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 4096
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICREQ_SIZE
|
||||
`define ICREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
`endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
`ifndef IDREQ_SIZE
|
||||
`define IDREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef IDRSQ_SIZE
|
||||
`define IDRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
@@ -232,38 +328,6 @@
|
||||
`define DSRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 4096
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICREQ_SIZE
|
||||
`define ICREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
`endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
`ifndef IDREQ_SIZE
|
||||
`define IDREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef IDRSQ_SIZE
|
||||
`define IDRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
|
||||
@@ -66,6 +66,10 @@ module VX_core #(
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if();
|
||||
`endif
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
|
||||
@@ -174,6 +178,9 @@ module VX_core #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) pipeline (
|
||||
`SCOPE_BIND_VX_core_pipeline
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
@@ -231,6 +238,9 @@ module VX_core #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mem_unit (
|
||||
`SCOPE_BIND_VX_core_mem_unit
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -238,7 +248,7 @@ module VX_core #(
|
||||
// Core <-> Dcache
|
||||
.core_dcache_req_if (core_dcache_req_if),
|
||||
.core_dcache_rsp_if (core_dcache_rsp_if),
|
||||
|
||||
|
||||
// Core <-> Icache
|
||||
.core_icache_req_if (core_icache_req_if),
|
||||
.core_icache_rsp_if (core_icache_rsp_if),
|
||||
|
||||
@@ -6,6 +6,11 @@ module VX_csr_data #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
@@ -114,6 +119,67 @@ module VX_csr_data #(
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_ICACHE_ST : read_data_r = perf_pipeline_if.icache_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_ST_H : read_data_r = perf_pipeline_if.icache_stalls[63:32];
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibuffer_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = perf_pipeline_if.ibuffer_stalls[63:32];
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scoreboard_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = perf_pipeline_if.scoreboard_stalls[63:32];
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = perf_pipeline_if.alu_stalls[63:32];
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
|
||||
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = perf_pipeline_if.gpu_stalls[63:32];
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_if.read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = perf_memsys_if.icache_if.read_misses[63:32];
|
||||
`CSR_MPM_ICACHE_DREQ_ST : read_data_r = perf_memsys_if.icache_if.dreq_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_DREQ_ST_H : read_data_r = perf_memsys_if.icache_if.dreq_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_if.crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = perf_memsys_if.icache_if.crsp_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_MSHR_ST : read_data_r = perf_memsys_if.icache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_MSHR_ST_H : read_data_r = perf_memsys_if.icache_if.mshr_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_if.pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = perf_memsys_if.icache_if.pipe_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_if.reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = perf_memsys_if.icache_if.reads[63:32];
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_if.read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = perf_memsys_if.dcache_if.read_misses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_if.write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = perf_memsys_if.dcache_if.write_misses[63:32];
|
||||
`CSR_MPM_DCACHE_DREQ_ST : read_data_r = perf_memsys_if.dcache_if.dreq_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_DREQ_ST_H : read_data_r = perf_memsys_if.dcache_if.dreq_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_if.crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = perf_memsys_if.dcache_if.crsp_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = perf_memsys_if.dcache_if.mshr_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_if.pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = perf_memsys_if.dcache_if.pipe_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_if.reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = perf_memsys_if.dcache_if.reads[63:32];
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_if.writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = perf_memsys_if.dcache_if.writes[63:32];
|
||||
`CSR_MPM_DCACHE_EVICTS : read_data_r = perf_memsys_if.dcache_if.evictions[31:0];
|
||||
`CSR_MPM_DCACHE_EVICTS_H : read_data_r = perf_memsys_if.dcache_if.evictions[63:32];
|
||||
// PERF: memory
|
||||
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = perf_memsys_if.dram_latency[63:32];
|
||||
`CSR_MPM_DRAM_REQ : read_data_r = perf_memsys_if.dram_requests[31:0];
|
||||
`CSR_MPM_DRAM_REQ_H : read_data_r = perf_memsys_if.dram_requests[63:32];
|
||||
`CSR_MPM_DRAM_RSP : read_data_r = perf_memsys_if.dram_responses[31:0];
|
||||
`CSR_MPM_DRAM_RSP_H : read_data_r = perf_memsys_if.dram_responses[63:32];
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
||||
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
|
||||
@@ -128,10 +194,10 @@ module VX_csr_data #(
|
||||
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
|
||||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
|
||||
`CSR_INSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_INSTRET_H : read_data_r = csr_instret[63:32];
|
||||
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_MCYCLE_H : read_data_r = csr_cycle[63:32];
|
||||
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_MINSTRET_H: read_data_r = csr_instret[63:32];
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
|
||||
@@ -6,6 +6,11 @@ module VX_csr_unit #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
@@ -51,6 +56,10 @@ module VX_csr_unit #(
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.read_enable (csr_pipe_req_if.valid),
|
||||
|
||||
@@ -240,45 +240,10 @@
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
||||
|
||||
// Core request size
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// Snoop request tag bits
|
||||
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SREQ_SIZE) : `L2SNP_TAG_WIDTH)
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define IBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
@@ -316,6 +281,41 @@
|
||||
// Core request size
|
||||
`define INUM_REQUESTS 1
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Block size in bytes
|
||||
`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
||||
// DRAM request address bits
|
||||
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
||||
|
||||
// DRAM byte enable bits
|
||||
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
||||
|
||||
// DRAM request tag bits
|
||||
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
||||
|
||||
// Core request size
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// Snoop request tag bits
|
||||
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SREQ_SIZE) : `L2SNP_TAG_WIDTH)
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
|
||||
@@ -16,8 +16,13 @@ module VX_execute #(
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
// perf
|
||||
// commit status
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
@@ -72,7 +77,11 @@ module VX_execute #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
|
||||
@@ -8,6 +8,10 @@ module VX_issue #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_writeback_if writeback_if,
|
||||
|
||||
@@ -120,6 +124,21 @@ module VX_issue #(
|
||||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_scoreboard_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_scoreboard_stalls <= 0;
|
||||
end else begin
|
||||
// scoreboard_stall
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scoreboard_stalls <= perf_scoreboard_stalls + 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign perf_pipeline_if.scoreboard_stalls = perf_scoreboard_stalls;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
|
||||
@@ -7,6 +7,10 @@ module VX_mem_unit # (
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_cache_core_req_if core_dcache_req_if,
|
||||
@@ -28,6 +32,11 @@ module VX_mem_unit # (
|
||||
VX_cache_core_req_if io_req_if,
|
||||
VX_cache_core_rsp_if io_rsp_if
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if();
|
||||
`endif
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH (`DDRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
|
||||
@@ -80,6 +89,82 @@ module VX_mem_unit # (
|
||||
.smem_rsp_if (smem_rsp_if),
|
||||
.io_rsp_if (io_rsp_if),
|
||||
.core_rsp_if (core_dcache_rsp_if)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`IBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`INUM_BANKS),
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.NUM_REQS (`INUM_REQUESTS),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.DRSQ_SIZE (`IDRSQ_SIZE),
|
||||
.SREQ_SIZE (1),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
.SRSQ_SIZE (1),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (core_icache_req_if.valid),
|
||||
.core_req_rw (core_icache_req_if.rw),
|
||||
.core_req_byteen (core_icache_req_if.byteen),
|
||||
.core_req_addr (core_icache_req_if.addr),
|
||||
.core_req_data (core_icache_req_if.data),
|
||||
.core_req_tag (core_icache_req_if.tag),
|
||||
.core_req_ready (core_icache_req_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (core_icache_rsp_if.valid),
|
||||
.core_rsp_data (core_icache_rsp_if.data),
|
||||
.core_rsp_tag (core_icache_rsp_if.tag),
|
||||
.core_rsp_ready (core_icache_rsp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_icache_if),
|
||||
`endif
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_valid (icache_dram_req_if.valid),
|
||||
.dram_req_rw (icache_dram_req_if.rw),
|
||||
.dram_req_byteen (icache_dram_req_if.byteen),
|
||||
.dram_req_addr (icache_dram_req_if.addr),
|
||||
.dram_req_data (icache_dram_req_if.data),
|
||||
.dram_req_tag (icache_dram_req_if.tag),
|
||||
.dram_req_ready (icache_dram_req_if.ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (icache_dram_rsp_if.valid),
|
||||
.dram_rsp_data (icache_dram_rsp_if.data),
|
||||
.dram_rsp_tag (icache_dram_rsp_if.tag),
|
||||
.dram_rsp_ready (icache_dram_rsp_if.ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (1'b0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_inv (1'b0),
|
||||
.snp_req_tag (0),
|
||||
`UNUSED_PIN (snp_req_ready),
|
||||
|
||||
// Snoop response
|
||||
`UNUSED_PIN (snp_rsp_valid),
|
||||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
@@ -124,6 +209,10 @@ module VX_mem_unit # (
|
||||
.core_rsp_tag (dcache_rsp_if.tag),
|
||||
.core_rsp_ready (dcache_rsp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_dcache_if),
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
.dram_req_valid (dcache_dram_req_if.valid),
|
||||
.dram_req_rw (dcache_dram_req_if.rw),
|
||||
@@ -151,78 +240,6 @@ module VX_mem_unit # (
|
||||
.snp_rsp_tag (dcache_snp_rsp_if.tag),
|
||||
.snp_rsp_ready (dcache_snp_rsp_if.ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.BANK_LINE_SIZE (`IBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`INUM_BANKS),
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.NUM_REQS (`INUM_REQUESTS),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.DRSQ_SIZE (`IDRSQ_SIZE),
|
||||
.SREQ_SIZE (1),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
.SRSQ_SIZE (1),
|
||||
.DRAM_ENABLE (1),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (core_icache_req_if.valid),
|
||||
.core_req_rw (core_icache_req_if.rw),
|
||||
.core_req_byteen (core_icache_req_if.byteen),
|
||||
.core_req_addr (core_icache_req_if.addr),
|
||||
.core_req_data (core_icache_req_if.data),
|
||||
.core_req_tag (core_icache_req_if.tag),
|
||||
.core_req_ready (core_icache_req_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (core_icache_rsp_if.valid),
|
||||
.core_rsp_data (core_icache_rsp_if.data),
|
||||
.core_rsp_tag (core_icache_rsp_if.tag),
|
||||
.core_rsp_ready (core_icache_rsp_if.ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_valid (icache_dram_req_if.valid),
|
||||
.dram_req_rw (icache_dram_req_if.rw),
|
||||
.dram_req_byteen (icache_dram_req_if.byteen),
|
||||
.dram_req_addr (icache_dram_req_if.addr),
|
||||
.dram_req_data (icache_dram_req_if.data),
|
||||
.dram_req_tag (icache_dram_req_if.tag),
|
||||
.dram_req_ready (icache_dram_req_if.ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (icache_dram_rsp_if.valid),
|
||||
.dram_rsp_data (icache_dram_rsp_if.data),
|
||||
.dram_rsp_tag (icache_dram_rsp_if.tag),
|
||||
.dram_rsp_ready (icache_dram_rsp_if.ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (1'b0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_inv (1'b0),
|
||||
.snp_req_tag (0),
|
||||
`UNUSED_PIN (snp_req_ready),
|
||||
|
||||
// Snoop response
|
||||
`UNUSED_PIN (snp_rsp_valid),
|
||||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
@@ -268,6 +285,10 @@ module VX_mem_unit # (
|
||||
.core_rsp_tag (smem_rsp_if.tag),
|
||||
.core_rsp_ready (smem_rsp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_smem_if),
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
`UNUSED_PIN (dram_req_valid),
|
||||
`UNUSED_PIN (dram_req_rw),
|
||||
@@ -340,4 +361,65 @@ module VX_mem_unit # (
|
||||
.rsp_ready_in (dram_rsp_if.ready)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
assign perf_memsys_if.icache_if.read_misses = perf_icache_if.read_misses;
|
||||
assign perf_memsys_if.icache_if.write_misses = perf_icache_if.write_misses;
|
||||
assign perf_memsys_if.icache_if.mshr_stalls = perf_icache_if.mshr_stalls;
|
||||
assign perf_memsys_if.icache_if.crsp_stalls = perf_icache_if.crsp_stalls;
|
||||
assign perf_memsys_if.icache_if.dreq_stalls = perf_icache_if.dreq_stalls;
|
||||
assign perf_memsys_if.icache_if.pipe_stalls = perf_icache_if.pipe_stalls;
|
||||
assign perf_memsys_if.icache_if.reads = perf_icache_if.reads;
|
||||
assign perf_memsys_if.icache_if.writes = perf_icache_if.writes;
|
||||
assign perf_memsys_if.icache_if.evictions = perf_icache_if.evictions;
|
||||
|
||||
assign perf_memsys_if.dcache_if.read_misses = perf_dcache_if.read_misses;
|
||||
assign perf_memsys_if.dcache_if.write_misses = perf_dcache_if.write_misses;
|
||||
assign perf_memsys_if.dcache_if.mshr_stalls = perf_dcache_if.mshr_stalls;
|
||||
assign perf_memsys_if.dcache_if.crsp_stalls = perf_dcache_if.crsp_stalls;
|
||||
assign perf_memsys_if.dcache_if.dreq_stalls = perf_dcache_if.dreq_stalls;
|
||||
assign perf_memsys_if.dcache_if.pipe_stalls = perf_dcache_if.pipe_stalls;
|
||||
assign perf_memsys_if.dcache_if.reads = perf_dcache_if.reads;
|
||||
assign perf_memsys_if.dcache_if.writes = perf_dcache_if.writes;
|
||||
assign perf_memsys_if.dcache_if.evictions = perf_dcache_if.evictions;
|
||||
|
||||
reg [63:0] perf_dram_lat_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_lat_per_cycle <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid & (~dram_req_if.rw) & dram_req_if.ready & dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle;
|
||||
end else if (dram_req_if.valid & (~dram_req_if.rw) & dram_req_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle + 64'd1;
|
||||
end else if (dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle - 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] perf_dram_req, perf_dram_rsp, perf_dram_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_req <= 0;
|
||||
perf_dram_rsp <= 0;
|
||||
perf_dram_lat <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid & dram_req_if.ready) begin
|
||||
perf_dram_req <= perf_dram_req + 64'd1;
|
||||
end
|
||||
if (dram_rsp_if.valid & dram_rsp_if.ready) begin
|
||||
perf_dram_rsp <= perf_dram_rsp + 64'd1;
|
||||
end
|
||||
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_if.dram_requests = perf_dram_req;
|
||||
assign perf_memsys_if.dram_responses = perf_dram_rsp;
|
||||
assign perf_memsys_if.dram_latency = perf_dram_lat;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -51,6 +51,10 @@ module VX_pipeline #(
|
||||
output wire[31:0] csr_io_rsp_data,
|
||||
input wire csr_io_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Status
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
@@ -171,6 +175,10 @@ module VX_pipeline #(
|
||||
VX_commit_if fpu_commit_if();
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if();
|
||||
`endif
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) fetch (
|
||||
@@ -206,6 +214,10 @@ module VX_pipeline #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
@@ -224,7 +236,12 @@ module VX_pipeline #(
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
|
||||
.dcache_req_if (core_dcache_req_if),
|
||||
.dcache_rsp_if (core_dcache_rsp_if),
|
||||
|
||||
@@ -272,4 +289,78 @@ module VX_pipeline #(
|
||||
.cmt_to_csr_if (cmt_to_csr_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_icache_stalls;
|
||||
reg [63:0] perf_ibuffer_stalls;
|
||||
reg [63:0] perf_alu_stalls;
|
||||
reg [63:0] perf_lsu_stalls;
|
||||
reg [63:0] perf_csr_stalls;
|
||||
reg [63:0] perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [63:0] perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [63:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_icache_stalls <= 0;
|
||||
perf_ibuffer_stalls <= 0;
|
||||
perf_alu_stalls <= 0;
|
||||
perf_lsu_stalls <= 0;
|
||||
perf_csr_stalls <= 0;
|
||||
perf_gpu_stalls <= 0;
|
||||
`ifdef EXT_M_ENABLE
|
||||
perf_mul_stalls <= 0;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
end else begin
|
||||
if (core_icache_req_if.valid & !core_icache_req_if.ready) begin
|
||||
perf_icache_stalls <= perf_icache_stalls + 64'd1;
|
||||
end
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibuffer_stalls <= perf_ibuffer_stalls + 64'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 64'd1;
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 64'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + 64'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||
end
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (mul_req_if.valid & !mul_req_if.ready) begin
|
||||
perf_mul_stalls <= perf_mul_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_pipeline_if.icache_stalls = perf_icache_stalls;
|
||||
assign perf_pipeline_if.ibuffer_stalls = perf_ibuffer_stalls;
|
||||
assign perf_pipeline_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -299,6 +299,9 @@ module Vortex (
|
||||
);
|
||||
|
||||
if (`L3_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l3cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid_qual;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw_qual;
|
||||
@@ -347,10 +350,14 @@ module Vortex (
|
||||
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l3cache_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_cluster_dram_req_valid_qual),
|
||||
.core_req_rw (per_cluster_dram_req_rw_qual),
|
||||
|
||||
21
hw/rtl/cache/VX_bank.v
vendored
21
hw/rtl/cache/VX_bank.v
vendored
@@ -96,6 +96,14 @@ module VX_bank #(
|
||||
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_mshr_stall,
|
||||
output wire perf_pipe_stall,
|
||||
output wire perf_evict,
|
||||
output wire perf_read_miss,
|
||||
output wire perf_write_miss,
|
||||
`endif
|
||||
|
||||
// Misses
|
||||
output wire misses
|
||||
);
|
||||
@@ -567,7 +575,6 @@ end else begin
|
||||
assign incoming_fill_st2 = 0;
|
||||
|
||||
assign misses = 0;
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
@@ -951,6 +958,18 @@ end
|
||||
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st3, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_pipe_stall = pipeline_stall;
|
||||
assign perf_mshr_stall = mshr_going_full;
|
||||
assign perf_read_miss = !pipeline_stall & miss_st1 & !is_mshr_st1 & !mem_rw_st1;
|
||||
assign perf_write_miss = !pipeline_stall & miss_st1 & !is_mshr_st1 & mem_rw_st1;
|
||||
if (DRAM_ENABLE) begin
|
||||
assign perf_evict = dreq_push & do_writeback_st3 & !is_snp_st3;
|
||||
end else begin
|
||||
assign perf_evict = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
wire incoming_fill_dfp_st3 = drsq_push && (addr_st3 == dram_rsp_addr);
|
||||
always @(posedge clk) begin
|
||||
|
||||
178
hw/rtl/cache/VX_cache.v
vendored
178
hw/rtl/cache/VX_cache.v
vendored
@@ -70,7 +70,12 @@ module VX_cache #(
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
|
||||
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
output wire dram_req_valid,
|
||||
output wire dram_req_rw,
|
||||
@@ -130,7 +135,16 @@ module VX_cache #(
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_miss;
|
||||
assign miss_vec = per_bank_miss;
|
||||
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_evict_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
`endif
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign snp_req_ready = per_bank_snp_req_ready;
|
||||
end else begin
|
||||
@@ -139,9 +153,9 @@ module VX_cache #(
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) cache_core_req_bank_sel (
|
||||
.core_req_valid (core_req_valid),
|
||||
@@ -312,6 +326,14 @@ module VX_cache #(
|
||||
.dram_rsp_addr (curr_bank_dram_rsp_addr),
|
||||
.dram_rsp_ready (curr_bank_dram_rsp_ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_mshr_stall (perf_mshr_stall_per_bank[i]),
|
||||
.perf_pipe_stall (perf_pipe_stall_per_bank[i]),
|
||||
.perf_evict (perf_evict_per_bank[i]),
|
||||
.perf_read_miss (perf_read_miss_per_bank[i]),
|
||||
.perf_write_miss (perf_write_miss_per_bank[i]),
|
||||
`endif
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (curr_bank_snp_req_valid),
|
||||
.snp_req_addr (curr_bank_snp_req_addr),
|
||||
@@ -408,4 +430,150 @@ module VX_cache #(
|
||||
`UNUSED_VAR (snp_rsp_ready)
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_req_r, core_req_w
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_req_r_per_cycle, perf_core_req_w_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
VX_countones #( // core_req_r
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_r_count (
|
||||
.valids (core_req_valid & {NUM_REQS{core_req_ready & ~core_req_rw}}),
|
||||
.count (perf_core_req_r_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_req_w
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_w_count (
|
||||
.valids (core_req_valid & {NUM_REQS{core_req_ready & core_req_rw}}),
|
||||
.count (perf_core_req_w_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_rsp
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & {NUM_REQS{!core_rsp_ready}}),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
end else begin
|
||||
VX_countones #( // core_req_r
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_r_count (
|
||||
.valids (core_req_valid & core_req_ready & ~core_req_rw),
|
||||
.count (perf_core_req_r_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_req_w
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_req_w_count (
|
||||
.valids (core_req_valid & core_req_ready & core_req_rw),
|
||||
.count (perf_core_req_w_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #( // core_rsp
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & ~core_rsp_ready),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
end
|
||||
|
||||
// per cycle: msrq stalls, pipeline stalls, evictions, read misses, write misses
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_evictions_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_mshr_stall_count (
|
||||
.valids (perf_mshr_stall_per_bank),
|
||||
.count (perf_mshr_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_total_stall_count (
|
||||
.valids (perf_pipe_stall_per_bank),
|
||||
.count (perf_pipe_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_EVICTSict_count (
|
||||
.valids (perf_evict_per_bank),
|
||||
.count (perf_evictions_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_read_miss_count (
|
||||
.valids (perf_read_miss_per_bank),
|
||||
.count (perf_read_miss_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_write_miss_count (
|
||||
.valids (perf_write_miss_per_bank),
|
||||
.count (perf_write_miss_per_cycle)
|
||||
);
|
||||
|
||||
reg [63:0] perf_core_req_r;
|
||||
reg [63:0] perf_core_req_w;
|
||||
reg [63:0] perf_mshr_stall;
|
||||
reg [63:0] perf_pipe_stall;
|
||||
reg [63:0] perf_evictions;
|
||||
reg [63:0] perf_read_miss;
|
||||
reg [63:0] perf_write_miss;
|
||||
reg [63:0] perf_crsp_stall;
|
||||
reg [63:0] perf_dreq_stall;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_core_req_r <= 0;
|
||||
perf_core_req_w <= 0;
|
||||
perf_crsp_stall <= 0;
|
||||
perf_mshr_stall <= 0;
|
||||
perf_pipe_stall <= 0;
|
||||
perf_evictions <= 0;
|
||||
perf_read_miss <= 0;
|
||||
perf_write_miss <= 0;
|
||||
perf_dreq_stall <= 0;
|
||||
end else begin
|
||||
// core requests
|
||||
perf_core_req_r <= perf_core_req_r + $bits(perf_core_req_r)'(perf_core_req_r_per_cycle);
|
||||
perf_core_req_w <= perf_core_req_w + $bits(perf_core_req_w)'(perf_core_req_w_per_cycle);
|
||||
// core response stalls
|
||||
perf_crsp_stall <= perf_crsp_stall + $bits(perf_crsp_stall)'(perf_crsp_stall_per_cycle);
|
||||
// miss reserve queue stalls
|
||||
perf_mshr_stall <= perf_mshr_stall + $bits(perf_mshr_stall)'(perf_mshr_stall_per_cycle);
|
||||
// pipeline stalls
|
||||
perf_pipe_stall <= perf_pipe_stall + $bits(perf_pipe_stall)'(perf_pipe_stall_per_cycle);
|
||||
// total evictions
|
||||
perf_evictions <= perf_evictions + $bits(perf_evictions)'(perf_evictions_per_cycle);
|
||||
// read misses
|
||||
perf_read_miss <= perf_read_miss + $bits(perf_read_miss)'(perf_read_miss_per_cycle);
|
||||
// write misses
|
||||
perf_write_miss <= perf_write_miss + $bits(perf_write_miss)'(perf_write_miss_per_cycle);
|
||||
// dram request stalls
|
||||
if (dram_req_valid & !dram_req_ready) begin
|
||||
perf_dreq_stall <= perf_dreq_stall + 64'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_cache_if.reads = perf_core_req_r;
|
||||
assign perf_cache_if.writes = perf_core_req_w;
|
||||
assign perf_cache_if.read_misses = perf_read_miss;
|
||||
assign perf_cache_if.write_misses = perf_write_miss;
|
||||
assign perf_cache_if.evictions = perf_evictions;
|
||||
assign perf_cache_if.mshr_stalls = perf_mshr_stall;
|
||||
assign perf_cache_if.pipe_stalls = perf_pipe_stall;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stall;
|
||||
assign perf_cache_if.dreq_stalls = perf_dreq_stall;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -159,7 +159,7 @@ module VX_fpnew
|
||||
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
|
||||
.out_valid_o (fpu_valid_out),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end else begin
|
||||
fpnew_top #(
|
||||
@@ -179,14 +179,14 @@ module VX_fpnew
|
||||
.vectorial_op_i (1'b0),
|
||||
.tag_i (1'b0),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
`UNUSED_PIN (in_ready_o),
|
||||
`UNUSED_PIN (in_ready_o),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[i]),
|
||||
.status_o (fpu_status[i]),
|
||||
`UNUSED_PIN (tag_o),
|
||||
`UNUSED_PIN (out_valid_o),
|
||||
`UNUSED_PIN (tag_o),
|
||||
`UNUSED_PIN (out_valid_o),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
20
hw/rtl/interfaces/VX_perf_cache_if.v
Normal file
20
hw/rtl/interfaces/VX_perf_cache_if.v
Normal file
@@ -0,0 +1,20 @@
|
||||
`ifndef VX_PERF_CACHE_IF
|
||||
`define VX_PERF_CACHE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_cache_if ();
|
||||
|
||||
wire [63:0] reads;
|
||||
wire [63:0] writes;
|
||||
wire [63:0] read_misses;
|
||||
wire [63:0] write_misses;
|
||||
wire [63:0] evictions;
|
||||
wire [63:0] mshr_stalls;
|
||||
wire [63:0] crsp_stalls;
|
||||
wire [63:0] dreq_stalls;
|
||||
wire [63:0] pipe_stalls;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
17
hw/rtl/interfaces/VX_perf_memsys_if.v
Normal file
17
hw/rtl/interfaces/VX_perf_memsys_if.v
Normal file
@@ -0,0 +1,17 @@
|
||||
`ifndef VX_PERF_MEMSYS_IF
|
||||
`define VX_PERF_MEMSYS_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_memsys_if ();
|
||||
|
||||
VX_perf_cache_if dcache_if;
|
||||
VX_perf_cache_if icache_if;
|
||||
|
||||
wire [63:0] dram_latency;
|
||||
wire [63:0] dram_requests;
|
||||
wire [63:0] dram_responses;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
25
hw/rtl/interfaces/VX_perf_pipeline_if.v
Normal file
25
hw/rtl/interfaces/VX_perf_pipeline_if.v
Normal file
@@ -0,0 +1,25 @@
|
||||
`ifndef VX_PERF_PIPELINE_IF
|
||||
`define VX_PERF_PIPELINE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
// from pipeline
|
||||
wire [63:0] icache_stalls;
|
||||
wire [63:0] ibuffer_stalls;
|
||||
// from issue
|
||||
wire [63:0] scoreboard_stalls;
|
||||
// from execute
|
||||
wire [63:0] lsu_stalls;
|
||||
wire [63:0] csr_stalls;
|
||||
wire [63:0] alu_stalls;
|
||||
wire [63:0] gpu_stalls;
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire [63:0] mul_stalls;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [63:0] fpu_stalls;
|
||||
`endif
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
Reference in New Issue
Block a user