Rename sched_barrier_stalls -> perf_sched_barrier_idles
Sched stall by barrier is really idle because it causes !scheduler_if.valid, which is counted as part of sched_idle.
This commit is contained in:
@@ -415,13 +415,13 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||||||
$itor(instrs) / $itor(cycles));
|
$itor(instrs) / $itor(cycles));
|
||||||
$display("scheduler idle: %d cycles (%.2f%%)", pipeline_perf_if.sched_idles,
|
$display("scheduler idle: %d cycles (%.2f%%)", pipeline_perf_if.sched_idles,
|
||||||
$itor(pipeline_perf_if.sched_idles) / $itor(cycles) * 100.0);
|
$itor(pipeline_perf_if.sched_idles) / $itor(cycles) * 100.0);
|
||||||
|
$display("scheduler barrier idle: %d count across NUM_WARPS=%d",
|
||||||
|
pipeline_perf_if.sched_barrier_idles, `NUM_WARPS);
|
||||||
|
// sched_stalls can happen when the later issue stage stalls,
|
||||||
|
// causing the ibuffer to clog.
|
||||||
$display("scheduler stalls: %d cycles (%.2f%%)", pipeline_perf_if.sched_stalls,
|
$display("scheduler stalls: %d cycles (%.2f%%)", pipeline_perf_if.sched_stalls,
|
||||||
$itor(pipeline_perf_if.sched_stalls) / $itor(cycles) * 100.0);
|
$itor(pipeline_perf_if.sched_stalls) / $itor(cycles) * 100.0);
|
||||||
$display("scheduler barrier stalls: %d count across NUM_WARPS=%d (%.2f%%)",
|
$display("decode stalls (ibuffer not ready): %d cycles (%.2f%%)",pipeline_perf_if.ibf_stalls,
|
||||||
pipeline_perf_if.sched_barrier_stalls,
|
|
||||||
`NUM_WARPS,
|
|
||||||
$itor(pipeline_perf_if.sched_barrier_stalls) / $itor(cycles) * 100.0);
|
|
||||||
$display("decode stalls: %d cycles (%.2f%%)",pipeline_perf_if.ibf_stalls,
|
|
||||||
$itor(pipeline_perf_if.ibf_stalls) / $itor(cycles) * 100.0);
|
$itor(pipeline_perf_if.ibf_stalls) / $itor(cycles) * 100.0);
|
||||||
// see VX_scoreboard.sv
|
// see VX_scoreboard.sv
|
||||||
// scb_stalls: valid & ~ready (ready = stg_ready_in && operands_ready)
|
// scb_stalls: valid & ~ready (ready = stg_ready_in && operands_ready)
|
||||||
@@ -472,12 +472,12 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||||||
pipeline_perf_if.dispatch_any_fire_cycles,
|
pipeline_perf_if.dispatch_any_fire_cycles,
|
||||||
$itor(pipeline_perf_if.dispatch_any_fire_cycles) / $itor(cycles) * 100.0);
|
$itor(pipeline_perf_if.dispatch_any_fire_cycles) / $itor(cycles) * 100.0);
|
||||||
$display("ifetches: %d", perf_ifetches);
|
$display("ifetches: %d", perf_ifetches);
|
||||||
$display("ifetch latency: %f Cycles",
|
$display("ifetch latency: %f cycles",
|
||||||
$itor(icache_lat) / $itor(ifetches));
|
$itor(icache_lat) / $itor(ifetches));
|
||||||
$display("loads: %d", perf_loads);
|
$display("dcache loads: %d", perf_loads);
|
||||||
$display("load latency: %f Cycles",
|
$display("dcache load latency: %f cycles",
|
||||||
$itor(dcache_lat) / $itor(loads));
|
$itor(dcache_lat) / $itor(loads));
|
||||||
$display("stores: %d", perf_stores);
|
$display("dcache stores: %d", perf_stores);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -413,28 +413,28 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||||||
`ifdef PERF_ENABLE
|
`ifdef PERF_ENABLE
|
||||||
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
||||||
reg [`PERF_CTR_BITS-1:0] perf_sched_stalls;
|
reg [`PERF_CTR_BITS-1:0] perf_sched_stalls;
|
||||||
reg [`PERF_CTR_BITS-1:0] perf_sched_barrier_stalls;
|
reg [`PERF_CTR_BITS-1:0] perf_sched_barrier_idles;
|
||||||
|
|
||||||
wire schedule_idle = ~schedule_valid;
|
wire schedule_idle = ~schedule_valid;
|
||||||
wire schedule_stall = schedule_if.valid && ~schedule_if.ready;
|
wire schedule_stall = schedule_if.valid && ~schedule_if.ready;
|
||||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] schedule_barrier_stall;
|
wire [`CLOG2(`NUM_WARPS+1)-1:0] schedule_barrier_idle;
|
||||||
`POP_COUNT(schedule_barrier_stall, barrier_stalls);
|
`POP_COUNT(schedule_barrier_idle, barrier_stalls);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
perf_sched_idles <= '0;
|
perf_sched_idles <= '0;
|
||||||
|
perf_sched_barrier_idles <= '0;
|
||||||
perf_sched_stalls <= '0;
|
perf_sched_stalls <= '0;
|
||||||
perf_sched_barrier_stalls <= '0;
|
|
||||||
end else begin
|
end else begin
|
||||||
perf_sched_idles <= perf_sched_idles + `PERF_CTR_BITS'(schedule_idle);
|
perf_sched_idles <= perf_sched_idles + `PERF_CTR_BITS'(schedule_idle);
|
||||||
|
perf_sched_barrier_idles <= perf_sched_barrier_idles + `PERF_CTR_BITS'(schedule_barrier_idle);
|
||||||
perf_sched_stalls <= perf_sched_stalls + `PERF_CTR_BITS'(schedule_stall);
|
perf_sched_stalls <= perf_sched_stalls + `PERF_CTR_BITS'(schedule_stall);
|
||||||
perf_sched_barrier_stalls <= perf_sched_barrier_stalls + `PERF_CTR_BITS'(schedule_barrier_stall);
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
||||||
|
assign perf_schedule_if.sched_barrier_idles = perf_sched_barrier_idles;
|
||||||
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
||||||
assign perf_schedule_if.sched_barrier_stalls = perf_sched_barrier_stalls;
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -16,9 +16,10 @@
|
|||||||
interface VX_pipeline_perf_if ();
|
interface VX_pipeline_perf_if ();
|
||||||
wire [`PERF_CTR_BITS-1:0] sched_idles;
|
wire [`PERF_CTR_BITS-1:0] sched_idles;
|
||||||
wire [`PERF_CTR_BITS-1:0] sched_stalls;
|
wire [`PERF_CTR_BITS-1:0] sched_stalls;
|
||||||
wire [`PERF_CTR_BITS-1:0] sched_barrier_stalls;
|
wire [`PERF_CTR_BITS-1:0] sched_barrier_idles;
|
||||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||||
|
wire [`PERF_CTR_BITS-1:0] scb_fires;
|
||||||
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
|
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
|
||||||
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
|
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
|
||||||
wire [`PERF_CTR_BITS-1:0] dispatch_stalls [`NUM_EX_UNITS];
|
wire [`PERF_CTR_BITS-1:0] dispatch_stalls [`NUM_EX_UNITS];
|
||||||
@@ -34,13 +35,14 @@ interface VX_pipeline_perf_if ();
|
|||||||
|
|
||||||
modport schedule (
|
modport schedule (
|
||||||
output sched_idles,
|
output sched_idles,
|
||||||
output sched_barrier_stalls,
|
output sched_barrier_idles,
|
||||||
output sched_stalls
|
output sched_stalls
|
||||||
);
|
);
|
||||||
|
|
||||||
modport issue (
|
modport issue (
|
||||||
output ibf_stalls,
|
output ibf_stalls,
|
||||||
output scb_stalls,
|
output scb_stalls,
|
||||||
|
output scb_fires,
|
||||||
output units_uses,
|
output units_uses,
|
||||||
output sfu_uses,
|
output sfu_uses,
|
||||||
output dispatch_stalls,
|
output dispatch_stalls,
|
||||||
@@ -51,10 +53,11 @@ interface VX_pipeline_perf_if ();
|
|||||||
|
|
||||||
modport slave (
|
modport slave (
|
||||||
input sched_idles,
|
input sched_idles,
|
||||||
input sched_barrier_stalls,
|
input sched_barrier_idles,
|
||||||
input sched_stalls,
|
input sched_stalls,
|
||||||
input ibf_stalls,
|
input ibf_stalls,
|
||||||
input scb_stalls,
|
input scb_stalls,
|
||||||
|
input scb_fires,
|
||||||
input units_uses,
|
input units_uses,
|
||||||
input sfu_uses,
|
input sfu_uses,
|
||||||
input dispatch_stalls,
|
input dispatch_stalls,
|
||||||
|
|||||||
Reference in New Issue
Block a user