Add issue-stall-by-unit-busy perf counters
Add per-issue-width counters instead of using reduce "OR" and causing undercounting.
This commit is contained in:
@@ -176,36 +176,72 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
`ifdef PERF_ENABLE
|
`ifdef PERF_ENABLE
|
||||||
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
|
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_stalls_per_cycle_r;
|
||||||
|
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_valids_per_cycle_r;
|
||||||
|
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_fires_per_cycle_r;
|
||||||
|
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_stalls_per_cycle;
|
||||||
|
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_valids_per_cycle;
|
||||||
|
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_fires_per_cycle;
|
||||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
|
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
|
||||||
|
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_valids_per_cycle;
|
||||||
|
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_fires_per_cycle;
|
||||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
||||||
|
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_valids_r;
|
||||||
|
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_fires_r;
|
||||||
|
|
||||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
perf_issue_unit_stalls_per_cycle[i] = '0;
|
perf_issue_unit_stalls_per_cycle[i] = '0;
|
||||||
|
perf_issue_unit_valids_per_cycle[i] = '0;
|
||||||
|
perf_issue_unit_fires_per_cycle[i] = '0;
|
||||||
if (operands_if[i].valid && ~operands_if[i].ready) begin
|
if (operands_if[i].valid && ~operands_if[i].ready) begin
|
||||||
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||||
end
|
end
|
||||||
|
if (operands_if[i].valid) begin
|
||||||
|
perf_issue_unit_valids_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||||
|
end
|
||||||
|
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||||
|
perf_issue_unit_fires_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_reduce #(
|
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||||
.DATAW_IN (`NUM_EX_UNITS),
|
always @(*) begin
|
||||||
.N (`ISSUE_WIDTH),
|
perf_unit_stalls_per_cycle[i] = '0;
|
||||||
.OP ("|")
|
perf_unit_valids_per_cycle[i] = '0;
|
||||||
) reduce (
|
perf_unit_fires_per_cycle[i] = '0;
|
||||||
.data_in (perf_issue_unit_stalls_per_cycle),
|
for (integer isw = 0; isw < `ISSUE_WIDTH; ++isw) begin
|
||||||
.data_out (perf_unit_stalls_per_cycle)
|
perf_unit_stalls_per_cycle[i] = perf_unit_stalls_per_cycle[i] + perf_issue_unit_stalls_per_cycle[isw][i];
|
||||||
);
|
perf_unit_valids_per_cycle[i] = perf_unit_valids_per_cycle[i] + perf_issue_unit_valids_per_cycle[isw][i];
|
||||||
|
perf_unit_fires_per_cycle[i] = perf_unit_fires_per_cycle[i] + perf_issue_unit_fires_per_cycle[isw][i];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// VX_reduce #(
|
||||||
|
// .DATAW_IN (`NUM_EX_UNITS),
|
||||||
|
// .N (`ISSUE_WIDTH),
|
||||||
|
// .OP ("|")
|
||||||
|
// ) reduce (
|
||||||
|
// .data_in (perf_issue_unit_stalls_per_cycle),
|
||||||
|
// .data_out (perf_unit_stalls_per_cycle)
|
||||||
|
// );
|
||||||
|
|
||||||
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
|
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
|
||||||
|
`BUFFER(perf_unit_valids_per_cycle_r, perf_unit_valids_per_cycle);
|
||||||
|
`BUFFER(perf_unit_fires_per_cycle_r, perf_unit_fires_per_cycle);
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
perf_stalls_r[i] <= '0;
|
perf_stalls_r[i] <= '0;
|
||||||
|
perf_valids_r[i] <= '0;
|
||||||
|
perf_fires_r[i] <= '0;
|
||||||
end else begin
|
end else begin
|
||||||
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
|
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
|
||||||
|
perf_valids_r[i] <= perf_valids_r[i] + `PERF_CTR_BITS'(perf_unit_valids_per_cycle_r[i]);
|
||||||
|
perf_fires_r[i] <= perf_fires_r[i] + `PERF_CTR_BITS'(perf_unit_fires_per_cycle_r[i]);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user