Add perf counter for stall by any operand hazard

This commit is contained in:
Hansung Kim
2024-04-15 01:01:26 -07:00
parent 7ae54bd280
commit 87b966a5fa
4 changed files with 18 additions and 0 deletions

View File

@@ -434,6 +434,8 @@ module VX_core import VX_gpu_pkg::*; #(
$itor(pipeline_perf_if.scb_any_fire_cycles) / $itor(cycles) * 100.0); $itor(pipeline_perf_if.scb_any_fire_cycles) / $itor(cycles) * 100.0);
$display("issue scoreboard: stalls total:\t%d across ISSUE_WIDTH=%d", $display("issue scoreboard: stalls total:\t%d across ISSUE_WIDTH=%d",
pipeline_perf_if.scb_stalls, `ISSUE_WIDTH); pipeline_perf_if.scb_stalls, `ISSUE_WIDTH);
$display("issue scoreboard: stalls by operand hazard: total %d across ISSUE_WIDTH=%d",
pipeline_perf_if.scb_any_unit_uses, `ISSUE_WIDTH);
$display("issue scoreboard: stalls by operand hazard: alu %d (%2.2f cycles per issue)", $display("issue scoreboard: stalls by operand hazard: alu %d (%2.2f cycles per issue)",
scrb_alu_per_core, scrb_alu_per_core,
$itor(scrb_alu_per_core) / $itor(pipeline_perf_if.dispatch_fires[`EX_ALU])); $itor(scrb_alu_per_core) / $itor(pipeline_perf_if.dispatch_fires[`EX_ALU]));

View File

@@ -61,6 +61,7 @@ module VX_issue #(
.reset (scoreboard_reset), .reset (scoreboard_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_scb_stalls(perf_issue_if.scb_stalls), .perf_scb_stalls(perf_issue_if.scb_stalls),
.perf_scb_any_unit_uses(perf_issue_if.scb_any_unit_uses),
.perf_scb_fires (perf_issue_if.scb_fires), .perf_scb_fires (perf_issue_if.scb_fires),
.perf_scb_any_fire_cycles (perf_issue_if.scb_any_fire_cycles), .perf_scb_any_fire_cycles (perf_issue_if.scb_any_fire_cycles),
.perf_units_uses(perf_issue_if.units_uses), .perf_units_uses(perf_issue_if.units_uses),

View File

@@ -21,6 +21,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls, output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
output reg [`PERF_CTR_BITS-1:0] perf_scb_any_unit_uses,
output reg [`PERF_CTR_BITS-1:0] perf_scb_fires, output reg [`PERF_CTR_BITS-1:0] perf_scb_fires,
output reg [`PERF_CTR_BITS-1:0] perf_scb_any_fire_cycles, output reg [`PERF_CTR_BITS-1:0] perf_scb_any_fire_cycles,
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS], output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
@@ -45,6 +46,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle; wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle;
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r; wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
reg [`ISSUE_WIDTH-1:0] perf_issue_any_unit_per_cycle;
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_any_unit_per_cycle, perf_any_unit_per_cycle_r;
wire [`ISSUE_WIDTH-1:0] perf_issue_fires_per_cycle; wire [`ISSUE_WIDTH-1:0] perf_issue_fires_per_cycle;
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_fires_per_cycle, perf_fires_per_cycle_r; wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_fires_per_cycle, perf_fires_per_cycle_r;
@@ -53,6 +56,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] perf_scb_empty; reg [`PERF_CTR_BITS-1:0] perf_scb_empty;
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle); `POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
`POP_COUNT(perf_any_unit_per_cycle, perf_issue_any_unit_per_cycle);
`POP_COUNT(perf_fires_per_cycle, perf_issue_fires_per_cycle); `POP_COUNT(perf_fires_per_cycle, perf_issue_fires_per_cycle);
assign perf_any_fire_per_cycle = |perf_issue_fires_per_cycle; assign perf_any_fire_per_cycle = |perf_issue_fires_per_cycle;
@@ -95,6 +99,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
// ); // );
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle); `BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
`BUFFER(perf_any_unit_per_cycle_r, perf_any_unit_per_cycle);
`BUFFER(perf_fires_per_cycle_r, perf_fires_per_cycle); `BUFFER(perf_fires_per_cycle_r, perf_fires_per_cycle);
`BUFFER(perf_any_fire_per_cycle_r, perf_any_fire_per_cycle); `BUFFER(perf_any_fire_per_cycle_r, perf_any_fire_per_cycle);
`BUFFER(perf_units_per_cycle_r, perf_units_per_cycle); `BUFFER(perf_units_per_cycle_r, perf_units_per_cycle);
@@ -103,10 +108,12 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
perf_scb_stalls <= '0; perf_scb_stalls <= '0;
perf_scb_any_unit_uses <= '0;
perf_scb_fires <= '0; perf_scb_fires <= '0;
perf_scb_any_fire_cycles <= '0; perf_scb_any_fire_cycles <= '0;
end else begin end else begin
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r); perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
perf_scb_any_unit_uses <= perf_scb_any_unit_uses + `PERF_CTR_BITS'(perf_any_unit_per_cycle_r);
perf_scb_fires <= perf_scb_fires + `PERF_CTR_BITS'(perf_fires_per_cycle_r); perf_scb_fires <= perf_scb_fires + `PERF_CTR_BITS'(perf_fires_per_cycle_r);
perf_scb_any_fire_cycles <= perf_scb_any_fire_cycles + `PERF_CTR_BITS'(perf_any_fire_per_cycle_r); perf_scb_any_fire_cycles <= perf_scb_any_fire_cycles + `PERF_CTR_BITS'(perf_any_fire_per_cycle_r);
end end
@@ -159,27 +166,32 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
always @(*) begin always @(*) begin
perf_issue_units_per_cycle[i] = '0; perf_issue_units_per_cycle[i] = '0;
perf_issue_any_unit_per_cycle[i] = '0;
perf_issue_sfu_per_cycle[i] = '0; perf_issue_sfu_per_cycle[i] = '0;
if (ibuffer_if[i].valid) begin if (ibuffer_if[i].valid) begin
if (inuse_rd) begin if (inuse_rd) begin
perf_issue_any_unit_per_cycle[i] = '1;
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1; perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
end end
end end
if (inuse_rs1) begin if (inuse_rs1) begin
perf_issue_any_unit_per_cycle[i] = '1;
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1; perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
end end
end end
if (inuse_rs2) begin if (inuse_rs2) begin
perf_issue_any_unit_per_cycle[i] = '1;
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1; perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
end end
end end
if (inuse_rs3) begin if (inuse_rs3) begin
perf_issue_any_unit_per_cycle[i] = '1;
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1; perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1; perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;

View File

@@ -19,6 +19,7 @@ interface VX_pipeline_perf_if ();
wire [`PERF_CTR_BITS-1:0] sched_barrier_idles; wire [`PERF_CTR_BITS-1:0] sched_barrier_idles;
wire [`PERF_CTR_BITS-1:0] ibf_stalls; wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls; wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] scb_any_unit_uses;
wire [`PERF_CTR_BITS-1:0] scb_fires; wire [`PERF_CTR_BITS-1:0] scb_fires;
wire [`PERF_CTR_BITS-1:0] scb_any_fire_cycles; wire [`PERF_CTR_BITS-1:0] scb_any_fire_cycles;
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS]; wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
@@ -43,6 +44,7 @@ interface VX_pipeline_perf_if ();
modport issue ( modport issue (
output ibf_stalls, output ibf_stalls,
output scb_stalls, output scb_stalls,
output scb_any_unit_uses,
output scb_fires, output scb_fires,
output scb_any_fire_cycles, output scb_any_fire_cycles,
output units_uses, output units_uses,
@@ -59,6 +61,7 @@ interface VX_pipeline_perf_if ();
input sched_stalls, input sched_stalls,
input ibf_stalls, input ibf_stalls,
input scb_stalls, input scb_stalls,
input scb_any_unit_uses,
input scb_fires, input scb_fires,
input scb_any_fire_cycles, input scb_any_fire_cycles,
input units_uses, input units_uses,