profiling timing optimization

minor update

minor update

minor update
This commit is contained in:
Blaise Tine
2023-12-13 18:04:12 -08:00
parent f5f9e3dfdb
commit c6845a4c8d
11 changed files with 64 additions and 56 deletions

View File

@@ -229,14 +229,16 @@ module VX_shared_mem import VX_gpu_pkg::*; #(
`ifdef PERF_ENABLE
// per cycle: reads, writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle, perf_reads_per_cycle_r;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle, perf_writes_per_cycle_r;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
wire [NUM_REQS-1:0] perf_reads_per_req = req_valid & req_ready & ~req_rw;
wire [NUM_REQS-1:0] perf_writes_per_req = req_valid & req_ready & req_rw;
wire [NUM_REQS-1:0] perf_reads_per_req, perf_writes_per_req;
wire [NUM_REQS-1:0] perf_crsp_stall_per_req = rsp_valid & ~rsp_ready;
`BUFFER(perf_reads_per_req, req_valid & req_ready & ~req_rw);
`BUFFER(perf_writes_per_req, req_valid & req_ready & req_rw);
`POP_COUNT(perf_reads_per_cycle, perf_reads_per_req);
`POP_COUNT(perf_writes_per_cycle, perf_writes_per_req);
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
@@ -245,17 +247,14 @@ module VX_shared_mem import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] perf_writes;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
`BUFFER(perf_reads_per_cycle_r, perf_reads_per_cycle);
`BUFFER(perf_writes_per_cycle_r, perf_writes_per_cycle);
always @(posedge clk) begin
if (reset) begin
perf_reads <= '0;
perf_writes <= '0;
perf_crsp_stalls <= '0;
end else begin
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle_r);
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle_r);
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle);
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end