From 87b966a5fab1a78a729a1fc61edff0699dc73be6 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 15 Apr 2024 01:01:26 -0700 Subject: [PATCH] Add perf counter for stall by any operand hazard --- hw/rtl/core/VX_core.sv | 2 ++ hw/rtl/core/VX_issue.sv | 1 + hw/rtl/core/VX_scoreboard.sv | 12 ++++++++++++ hw/rtl/interfaces/VX_pipeline_perf_if.sv | 3 +++ 4 files changed, 18 insertions(+) diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 0c9a7ac1..5c898a93 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -434,6 +434,8 @@ module VX_core import VX_gpu_pkg::*; #( $itor(pipeline_perf_if.scb_any_fire_cycles) / $itor(cycles) * 100.0); $display("issue scoreboard: stalls total:\t%d across ISSUE_WIDTH=%d", pipeline_perf_if.scb_stalls, `ISSUE_WIDTH); + $display("issue scoreboard: stalls by operand hazard: total %d across ISSUE_WIDTH=%d", + pipeline_perf_if.scb_any_unit_uses, `ISSUE_WIDTH); $display("issue scoreboard: stalls by operand hazard: alu %d (%2.2f cycles per issue)", scrb_alu_per_core, $itor(scrb_alu_per_core) / $itor(pipeline_perf_if.dispatch_fires[`EX_ALU])); diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 4e79ce70..d7853751 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -61,6 +61,7 @@ module VX_issue #( .reset (scoreboard_reset), `ifdef PERF_ENABLE .perf_scb_stalls(perf_issue_if.scb_stalls), + .perf_scb_any_unit_uses(perf_issue_if.scb_any_unit_uses), .perf_scb_fires (perf_issue_if.scb_fires), .perf_scb_any_fire_cycles (perf_issue_if.scb_any_fire_cycles), .perf_units_uses(perf_issue_if.units_uses), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index fe038fb5..c63a5dcb 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -21,6 +21,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls, + output reg [`PERF_CTR_BITS-1:0] perf_scb_any_unit_uses, output reg [`PERF_CTR_BITS-1:0] perf_scb_fires, output reg [`PERF_CTR_BITS-1:0] perf_scb_any_fire_cycles, output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS], @@ -45,6 +46,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle; wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r; + reg [`ISSUE_WIDTH-1:0] perf_issue_any_unit_per_cycle; + wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_any_unit_per_cycle, perf_any_unit_per_cycle_r; wire [`ISSUE_WIDTH-1:0] perf_issue_fires_per_cycle; wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_fires_per_cycle, perf_fires_per_cycle_r; @@ -53,6 +56,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( reg [`PERF_CTR_BITS-1:0] perf_scb_empty; `POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle); + `POP_COUNT(perf_any_unit_per_cycle, perf_issue_any_unit_per_cycle); `POP_COUNT(perf_fires_per_cycle, perf_issue_fires_per_cycle); assign perf_any_fire_per_cycle = |perf_issue_fires_per_cycle; @@ -95,6 +99,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( // ); `BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle); + `BUFFER(perf_any_unit_per_cycle_r, perf_any_unit_per_cycle); `BUFFER(perf_fires_per_cycle_r, perf_fires_per_cycle); `BUFFER(perf_any_fire_per_cycle_r, perf_any_fire_per_cycle); `BUFFER(perf_units_per_cycle_r, perf_units_per_cycle); @@ -103,10 +108,12 @@ module VX_scoreboard import VX_gpu_pkg::*; #( always @(posedge clk) begin if (reset) begin perf_scb_stalls <= '0; + perf_scb_any_unit_uses <= '0; perf_scb_fires <= '0; perf_scb_any_fire_cycles <= '0; end else begin perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r); + perf_scb_any_unit_uses <= perf_scb_any_unit_uses + `PERF_CTR_BITS'(perf_any_unit_per_cycle_r); perf_scb_fires <= perf_scb_fires + `PERF_CTR_BITS'(perf_fires_per_cycle_r); perf_scb_any_fire_cycles <= perf_scb_any_fire_cycles + `PERF_CTR_BITS'(perf_any_fire_per_cycle_r); end @@ -159,27 +166,32 @@ module VX_scoreboard import VX_gpu_pkg::*; #( always @(*) begin perf_issue_units_per_cycle[i] = '0; + perf_issue_any_unit_per_cycle[i] = '0; perf_issue_sfu_per_cycle[i] = '0; if (ibuffer_if[i].valid) begin if (inuse_rd) begin + perf_issue_any_unit_per_cycle[i] = '1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1; if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1; end end if (inuse_rs1) begin + perf_issue_any_unit_per_cycle[i] = '1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1; if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1; end end if (inuse_rs2) begin + perf_issue_any_unit_per_cycle[i] = '1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1; if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1; end end if (inuse_rs3) begin + perf_issue_any_unit_per_cycle[i] = '1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1; if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1; diff --git a/hw/rtl/interfaces/VX_pipeline_perf_if.sv b/hw/rtl/interfaces/VX_pipeline_perf_if.sv index 874778b8..fc57cad9 100644 --- a/hw/rtl/interfaces/VX_pipeline_perf_if.sv +++ b/hw/rtl/interfaces/VX_pipeline_perf_if.sv @@ -19,6 +19,7 @@ interface VX_pipeline_perf_if (); wire [`PERF_CTR_BITS-1:0] sched_barrier_idles; wire [`PERF_CTR_BITS-1:0] ibf_stalls; wire [`PERF_CTR_BITS-1:0] scb_stalls; + wire [`PERF_CTR_BITS-1:0] scb_any_unit_uses; wire [`PERF_CTR_BITS-1:0] scb_fires; wire [`PERF_CTR_BITS-1:0] scb_any_fire_cycles; wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS]; @@ -43,6 +44,7 @@ interface VX_pipeline_perf_if (); modport issue ( output ibf_stalls, output scb_stalls, + output scb_any_unit_uses, output scb_fires, output scb_any_fire_cycles, output units_uses, @@ -59,6 +61,7 @@ interface VX_pipeline_perf_if (); input sched_stalls, input ibf_stalls, input scb_stalls, + input scb_any_unit_uses, input scb_fires, input scb_any_fire_cycles, input units_uses,