minor updates
This commit is contained in:
@@ -52,13 +52,7 @@ module VX_commit #(
|
||||
assign commit_tmask3 = gpu_commit_fire ? gpu_commit_if.tmask : 0;
|
||||
|
||||
wire [CMTW-1:0] commit_size;
|
||||
|
||||
VX_countones #(
|
||||
.N(3*`NUM_THREADS)
|
||||
) commit_ctr1 (
|
||||
.valids({commit_tmask3, commit_tmask2, commit_tmask1}),
|
||||
.count (commit_size)
|
||||
);
|
||||
assign commit_size = $countones({commit_tmask3, commit_tmask2, commit_tmask1});
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + CMTW),
|
||||
|
||||
@@ -152,10 +152,10 @@ module VX_issue #(
|
||||
`endif
|
||||
end else begin
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + 64'd1;
|
||||
perf_ibf_stalls <= perf_ibf_stalls + 64'd1;
|
||||
end
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + 64'd1;
|
||||
perf_scb_stalls <= perf_scb_stalls + 64'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 64'd1;
|
||||
|
||||
@@ -351,14 +351,10 @@ end
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_lat_per_cycle <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready && dram_rsp_if.valid && dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle;
|
||||
end else if (dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle + 64'd1;
|
||||
end else if (dram_rsp_if.valid && dram_rsp_if.ready) begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle - 64'd1;
|
||||
end
|
||||
end else begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle +
|
||||
64'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
|
||||
2'((dram_rsp_if.valid && dram_rsp_if.ready) && !(dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready))));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -161,12 +161,7 @@ module VX_warp_sched #(
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NW_BITS:0] active_barrier_count;
|
||||
`IGNORE_WARNINGS_END
|
||||
VX_countones #(
|
||||
.N(`NUM_WARPS)
|
||||
) barrier_count (
|
||||
.valids(barrier_stall_mask[warp_ctl_if.barrier.id]),
|
||||
.count (active_barrier_count)
|
||||
);
|
||||
assign active_barrier_count = $countones(barrier_stall_mask[warp_ctl_if.barrier.id]);
|
||||
|
||||
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
|
||||
|
||||
|
||||
64
hw/rtl/cache/VX_cache.v
vendored
64
hw/rtl/cache/VX_cache.v
vendored
@@ -356,69 +356,25 @@ module VX_cache #(
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_reads_count (
|
||||
.valids (core_req_valid & core_req_ready & ~core_req_rw),
|
||||
.count (perf_core_reads_per_cycle)
|
||||
);
|
||||
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
||||
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_writes_count (
|
||||
.valids (core_req_valid & core_req_ready & core_req_rw),
|
||||
.count (perf_core_writes_per_cycle)
|
||||
);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & {NUM_REQS{!core_rsp_ready}}),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & {NUM_REQS{!core_rsp_ready}});
|
||||
end else begin
|
||||
VX_countones #(
|
||||
.N(NUM_REQS)
|
||||
) perf_countones_core_rsp_count (
|
||||
.valids (core_rsp_valid & ~core_rsp_ready),
|
||||
.count (perf_crsp_stall_per_cycle)
|
||||
);
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
end
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_read_miss_count (
|
||||
.valids (perf_read_miss_per_bank),
|
||||
.count (perf_read_miss_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_write_miss_count (
|
||||
.valids (perf_write_miss_per_bank),
|
||||
.count (perf_write_miss_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_mshr_stall_count (
|
||||
.valids (perf_mshr_stall_per_bank),
|
||||
.count (perf_mshr_stall_per_cycle)
|
||||
);
|
||||
|
||||
VX_countones #(
|
||||
.N(NUM_BANKS)
|
||||
) perf_countones_total_stall_count (
|
||||
.valids (perf_pipe_stall_per_bank),
|
||||
.count (perf_pipe_stall_per_cycle)
|
||||
);
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
|
||||
assign perf_read_miss_per_cycle = $countones(perf_read_miss_per_bank);
|
||||
assign perf_write_miss_per_cycle = $countones(perf_write_miss_per_bank);
|
||||
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
|
||||
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
|
||||
|
||||
reg [63:0] perf_core_reads;
|
||||
reg [63:0] perf_core_writes;
|
||||
|
||||
22
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
22
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
@@ -50,7 +50,7 @@ module VX_cache_core_req_bank_sel #(
|
||||
reg [NUM_BANKS-1:0] per_bank_core_req_stall;
|
||||
|
||||
reg [NUM_REQS-1:0] core_req_ready_r;
|
||||
reg [NUM_BANKS-1:0] core_req_sel_r;
|
||||
reg [NUM_REQS-1:0] core_req_sel_r;
|
||||
wire [NUM_REQS-1:0][`BANK_SELECT_BITS-1:0] core_req_bid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
@@ -80,26 +80,34 @@ module VX_cache_core_req_bank_sel #(
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
core_req_sel_r = 0;
|
||||
|
||||
core_req_ready_r = 0;
|
||||
for (integer j = 0; j < NUM_BANKS; ++j) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin
|
||||
core_req_ready_r[i] = ~per_bank_core_req_stall[j];
|
||||
core_req_sel_r[i] = 1;
|
||||
core_req_ready_r[i] = ~per_bank_core_req_stall[j];
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_req_sel_r = 0;
|
||||
for (integer j = 0; j < NUM_BANKS; ++j) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin
|
||||
core_req_sel_r[i] = ~per_bank_core_req_stall[j];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] bank_stalls_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_valid & ~core_req_sel_r));
|
||||
bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_sel_r & ~core_req_ready_r));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_countones #(
|
||||
parameter N = 10,
|
||||
parameter N_BITS = $clog2(N+1)
|
||||
) (
|
||||
input wire [N-1:0] valids,
|
||||
output wire [N_BITS-1:0] count
|
||||
);
|
||||
/*reg [N_BITS-1:0] count_r;
|
||||
|
||||
always @(*) begin
|
||||
count_r = 0;
|
||||
for (integer i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
count_r = count_r + N_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign count = count_r;*/
|
||||
|
||||
assign count = $countones(valids);
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user