fixed instr/cycle perf counter

This commit is contained in:
Blaise Tine
2020-11-12 11:41:25 -08:00
parent fceb561cbd
commit 2e0f51af80
8 changed files with 81 additions and 80 deletions

View File

@@ -91,17 +91,10 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
return err; return err;
} }
extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) { extern int vx_get_perf(vx_device_h device, int core_id, size_t* instrs, size_t* cycles) {
int ret = 0; int ret = 0;
unsigned value; unsigned value;
if (cycles) {
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
*cycles = value;
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
*cycles = (*cycles << 32) | value;
}
if (instrs) { if (instrs) {
ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value); ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value);
@@ -110,5 +103,12 @@ extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t*
*instrs = (*instrs << 32) | value; *instrs = (*instrs << 32) | value;
} }
if (cycles) {
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
*cycles = value;
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
*cycles = (*cycles << 32) | value;
}
return ret; return ret;
} }

View File

@@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
int vx_upload_kernel_file(vx_device_h device, const char* filename); int vx_upload_kernel_file(vx_device_h device, const char* filename);
// get performance counters // get performance counters
int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs); int vx_get_perf(vx_device_h device, int core_id, size_t* instrs, size_t* cycles);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -18,35 +18,38 @@ module VX_commit #(
VX_writeback_if writeback_if, VX_writeback_if writeback_if,
VX_cmt_to_csr_if cmt_to_csr_if VX_cmt_to_csr_if cmt_to_csr_if
); );
localparam NCMTW = $clog2(`NUM_EXS*`NUM_THREADS+1); localparam CMTW = $clog2(`NUM_THREADS+1);
// CSRs update // CSRs update
wire [`NUM_EXS-1-1:0] exu_committed; wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
wire [`NUM_THREADS-1:0] lsu_committed; wire lsu_commit_fire = lsu_commit_if.valid && lsu_commit_if.ready;
wire [$clog2(`NUM_EXS-1+1)-1:0] exu_commits; wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
wire [$clog2(`NUM_THREADS+1)-1:0] lsu_commits; wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready;
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
assign exu_committed = {alu_commit_if.valid, wire commit_fire = alu_commit_fire
csr_commit_if.valid, || lsu_commit_fire
mul_commit_if.valid, || csr_commit_fire
fpu_commit_if.valid, || mul_commit_fire
gpu_commit_if.valid}; || fpu_commit_fire
|| gpu_commit_fire;
assign lsu_committed = {`NUM_THREADS{lsu_commit_if.valid}} & lsu_commit_if.tmask; wire [`NUM_THREADS-1:0] commit_tmask = alu_commit_fire ? alu_commit_if.tmask:
lsu_commit_fire ? lsu_commit_if.tmask:
csr_commit_fire ? csr_commit_if.tmask:
mul_commit_fire ? mul_commit_if.tmask:
fpu_commit_fire ? fpu_commit_if.tmask:
gpu_commit_if.tmask;
VX_countones #( wire [CMTW-1:0] commit_size;
.N(`NUM_EXS-1)
) exu_counter (
.valids(exu_committed),
.count (exu_commits)
);
VX_countones #( VX_countones #(
.N(`NUM_THREADS) .N(`NUM_THREADS)
) lsu_counter ( ) commit_ctr (
.valids(lsu_committed), .valids(commit_tmask),
.count (lsu_commits) .count (commit_size)
); );
fflags_t fflags; fflags_t fflags;
@@ -63,25 +66,23 @@ module VX_commit #(
end end
end end
fflags_t fflags_r;
reg has_fflags_r;
reg [`NW_BITS-1:0] wid_r;
reg [$clog2(`NUM_EXS-1+1)-1:0] exu_cmt_r;
reg [$clog2(`NUM_THREADS+1)-1:0] lsu_cmt_r;
reg csr_update_r; reg csr_update_r;
reg [`NW_BITS-1:0] wid_r;
reg [CMTW-1:0] commit_size_r;
reg has_fflags_r;
fflags_t fflags_r;
always @(posedge clk) begin always @(posedge clk) begin
csr_update_r <= (| exu_committed) | lsu_commit_if.valid; csr_update_r <= commit_fire;
fflags_r <= fflags; wid_r <= fpu_commit_if.wid;
has_fflags_r <= fpu_commit_if.valid && fpu_commit_if.has_fflags; commit_size_r <= commit_size;
wid_r <= fpu_commit_if.wid; has_fflags_r <= fpu_commit_if.has_fflags;
exu_cmt_r <= exu_commits; fflags_r <= fflags;
lsu_cmt_r <= lsu_commits;
end end
assign cmt_to_csr_if.valid = csr_update_r; assign cmt_to_csr_if.valid = csr_update_r;
assign cmt_to_csr_if.wid = wid_r; assign cmt_to_csr_if.wid = wid_r;
assign cmt_to_csr_if.num_commits = {exu_cmt_r, `NT_BITS'(0)} + NCMTW'(lsu_cmt_r); assign cmt_to_csr_if.commit_size = commit_size_r;
assign cmt_to_csr_if.has_fflags = has_fflags_r; assign cmt_to_csr_if.has_fflags = has_fflags_r;
assign cmt_to_csr_if.fflags = fflags_r; assign cmt_to_csr_if.fflags = fflags_r;

View File

@@ -40,7 +40,7 @@ module VX_csr_data #(
reg [31:0] read_data_r; reg [31:0] read_data_r;
always @(posedge clk) begin always @(posedge clk) begin
if (cmt_to_csr_if.has_fflags) begin if (cmt_to_csr_if.valid && cmt_to_csr_if.has_fflags) begin
csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags; csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags;
csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags; csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
end end
@@ -92,7 +92,7 @@ module VX_csr_data #(
csr_cycle <= csr_cycle + 1; csr_cycle <= csr_cycle + 1;
end end
if (cmt_to_csr_if.valid) begin if (cmt_to_csr_if.valid) begin
csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits); csr_instret <= csr_instret + 64'(cmt_to_csr_if.commit_size);
end end
end end
end end
@@ -100,42 +100,42 @@ module VX_csr_data #(
always @(*) begin always @(*) begin
read_data_r = 'x; read_data_r = 'x;
case (read_addr) case (read_addr)
`CSR_FFLAGS : read_data_r = 32'(csr_fflags[read_wid]); `CSR_FFLAGS : read_data_r = 32'(csr_fflags[read_wid]);
`CSR_FRM : read_data_r = 32'(csr_frm[read_wid]); `CSR_FRM : read_data_r = 32'(csr_frm[read_wid]);
`CSR_FCSR : read_data_r = 32'(csr_fcsr[read_wid]); `CSR_FCSR : read_data_r = 32'(csr_fcsr[read_wid]);
`CSR_LWID : read_data_r = 32'(read_wid); `CSR_LWID : read_data_r = 32'(read_wid);
`CSR_LTID , `CSR_LTID ,
`CSR_GTID , `CSR_GTID ,
`CSR_MHARTID , `CSR_MHARTID ,
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid); `CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
`CSR_GCID : read_data_r = CORE_ID; `CSR_GCID : read_data_r = CORE_ID;
`CSR_NT : read_data_r = `NUM_THREADS; `CSR_NT : read_data_r = `NUM_THREADS;
`CSR_NW : read_data_r = `NUM_WARPS; `CSR_NW : read_data_r = `NUM_WARPS;
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS; `CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
`CSR_SATP : read_data_r = 32'(csr_satp); `CSR_SATP : read_data_r = 32'(csr_satp);
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus); `CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
`CSR_MISA : read_data_r = `ISA_CODE; `CSR_MISA : read_data_r = `ISA_CODE;
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg); `CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg); `CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
`CSR_MIE : read_data_r = 32'(csr_mie); `CSR_MIE : read_data_r = 32'(csr_mie);
`CSR_MTVEC : read_data_r = 32'(csr_mtvec); `CSR_MTVEC : read_data_r = 32'(csr_mtvec);
`CSR_MEPC : read_data_r = 32'(csr_mepc); `CSR_MEPC : read_data_r = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]); `CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0: read_data_r = 32'(csr_pmpaddr[0]); `CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
`CSR_CYCLE : read_data_r = csr_cycle[31:0]; `CSR_CYCLE : read_data_r = csr_cycle[31:0];
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32]; `CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
`CSR_INSTRET : read_data_r = csr_instret[31:0]; `CSR_INSTRET : read_data_r = csr_instret[31:0];
`CSR_INSTRET_H:read_data_r = csr_instret[63:32]; `CSR_INSTRET_H : read_data_r = csr_instret[63:32];
`CSR_MVENDORID:read_data_r = `VENDOR_ID; `CSR_MVENDORID : read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID; `CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID; `CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin default: begin
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr); assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
@@ -144,6 +144,6 @@ module VX_csr_data #(
end end
assign read_data = read_data_r; assign read_data = read_data_r;
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid]; assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid];
endmodule endmodule

View File

@@ -72,8 +72,8 @@ module VX_writeback #(
fpu_valid ? fpu_commit_if.data : fpu_valid ? fpu_commit_if.data :
0; 0;
always @(*) assert(writeback_if.ready);
wire stall =~writeback_if.ready && writeback_if.valid; wire stall =~writeback_if.ready && writeback_if.valid;
always @(*) assert(writeback_if.ready); // the writeback currently has no backpressure from issue stage
VX_generic_register #( VX_generic_register #(
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))

View File

@@ -97,7 +97,7 @@ module VX_bank #(
input wire snp_rsp_ready, input wire snp_rsp_ready,
// Misses // Misses
output wire misses output wire misses
); );
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CORE_REQ_INFO

View File

@@ -392,12 +392,12 @@ module VX_cache #(
.snp_req_ready (curr_bank_snp_req_ready), .snp_req_ready (curr_bank_snp_req_ready),
// Snoop response // Snoop response
.snp_rsp_valid (curr_bank_snp_rsp_valid), .snp_rsp_valid (curr_bank_snp_rsp_valid),
.snp_rsp_tag (curr_bank_snp_rsp_tag), .snp_rsp_tag (curr_bank_snp_rsp_tag),
.snp_rsp_ready (curr_bank_snp_rsp_ready), .snp_rsp_ready (curr_bank_snp_rsp_ready),
//Misses //Misses
.misses (curr_bank_miss) .misses (curr_bank_miss)
); );
end end

View File

@@ -9,7 +9,7 @@ interface VX_cmt_to_csr_if ();
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [$clog2(`NUM_EXS*`NUM_THREADS+1)-1:0] num_commits; wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
wire has_fflags; wire has_fflags;
fflags_t fflags; fflags_t fflags;