Add perf counter for barrier schedule stalls
This commit is contained in:
@@ -166,6 +166,11 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
||||
// don't check req_id == rsp_id, otherwise it limits us to
|
||||
// 1 outstanding request. instead assume that any response coming
|
||||
// back contains a valid id
|
||||
//
|
||||
// NOTE(hansung): Because every response is broadcasted to all cores,
|
||||
// this doesn't work when cores in the cluster use different sets of
|
||||
// IDs. Need a way to keep track of in-use barriers for each core and
|
||||
// validate responses accordingly.
|
||||
if (gbar_bus_if.rsp_valid) begin
|
||||
barrier_masks_n[gbar_bus_if.rsp_id] = '0;
|
||||
// instead of unlocking all warps, only unlock those that
|
||||
@@ -408,22 +413,28 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_barrier_stalls;
|
||||
|
||||
wire schedule_idle = ~schedule_valid;
|
||||
wire schedule_stall = schedule_if.valid && ~schedule_if.ready;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] schedule_barrier_stall;
|
||||
`POP_COUNT(schedule_barrier_stall, barrier_stalls);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_sched_idles <= '0;
|
||||
perf_sched_stalls <= '0;
|
||||
perf_sched_barrier_stalls <= '0;
|
||||
end else begin
|
||||
perf_sched_idles <= perf_sched_idles + `PERF_CTR_BITS'(schedule_idle);
|
||||
perf_sched_stalls <= perf_sched_stalls + `PERF_CTR_BITS'(schedule_stall);
|
||||
perf_sched_barrier_stalls <= perf_sched_barrier_stalls + `PERF_CTR_BITS'(schedule_barrier_stall);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
||||
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
||||
assign perf_schedule_if.sched_barrier_stalls = perf_sched_barrier_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
Reference in New Issue
Block a user