Merge branch 'rtl' of https://github.com/hansungk/vortex-private into rtl
This commit is contained in:
@@ -264,7 +264,7 @@
|
|||||||
|
|
||||||
// Number of SFU units
|
// Number of SFU units
|
||||||
`ifndef NUM_SFU_LANES
|
`ifndef NUM_SFU_LANES
|
||||||
`define NUM_SFU_LANES `NUM_THREADS
|
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Size of Instruction Buffer
|
// Size of Instruction Buffer
|
||||||
|
|||||||
@@ -434,6 +434,8 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||||||
$itor(pipeline_perf_if.scb_any_fire_cycles) / $itor(cycles) * 100.0);
|
$itor(pipeline_perf_if.scb_any_fire_cycles) / $itor(cycles) * 100.0);
|
||||||
$display("issue scoreboard: stalls total:\t%d across ISSUE_WIDTH=%d",
|
$display("issue scoreboard: stalls total:\t%d across ISSUE_WIDTH=%d",
|
||||||
pipeline_perf_if.scb_stalls, `ISSUE_WIDTH);
|
pipeline_perf_if.scb_stalls, `ISSUE_WIDTH);
|
||||||
|
$display("issue scoreboard: stalls by operand hazard: total %d across ISSUE_WIDTH=%d",
|
||||||
|
pipeline_perf_if.scb_any_unit_uses, `ISSUE_WIDTH);
|
||||||
$display("issue scoreboard: stalls by operand hazard: alu %d (%2.2f cycles per issue)",
|
$display("issue scoreboard: stalls by operand hazard: alu %d (%2.2f cycles per issue)",
|
||||||
scrb_alu_per_core,
|
scrb_alu_per_core,
|
||||||
$itor(scrb_alu_per_core) / $itor(pipeline_perf_if.dispatch_fires[`EX_ALU]));
|
$itor(scrb_alu_per_core) / $itor(pipeline_perf_if.dispatch_fires[`EX_ALU]));
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ module VX_issue #(
|
|||||||
.reset (scoreboard_reset),
|
.reset (scoreboard_reset),
|
||||||
`ifdef PERF_ENABLE
|
`ifdef PERF_ENABLE
|
||||||
.perf_scb_stalls(perf_issue_if.scb_stalls),
|
.perf_scb_stalls(perf_issue_if.scb_stalls),
|
||||||
|
.perf_scb_any_unit_uses(perf_issue_if.scb_any_unit_uses),
|
||||||
.perf_scb_fires (perf_issue_if.scb_fires),
|
.perf_scb_fires (perf_issue_if.scb_fires),
|
||||||
.perf_scb_any_fire_cycles (perf_issue_if.scb_any_fire_cycles),
|
.perf_scb_any_fire_cycles (perf_issue_if.scb_any_fire_cycles),
|
||||||
.perf_units_uses(perf_issue_if.units_uses),
|
.perf_units_uses(perf_issue_if.units_uses),
|
||||||
@@ -71,8 +72,13 @@ module VX_issue #(
|
|||||||
.scoreboard_if (scoreboard_if)
|
.scoreboard_if (scoreboard_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
`ifdef GPR_DUPLICATED
|
||||||
|
VX_operands_dup #(
|
||||||
|
`else
|
||||||
VX_operands #(
|
VX_operands #(
|
||||||
.CORE_ID (CORE_ID)
|
`endif
|
||||||
|
.CORE_ID (CORE_ID),
|
||||||
|
.CACHE_ENABLE (0)
|
||||||
) operands (
|
) operands (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (operands_reset),
|
.reset (operands_reset),
|
||||||
|
|||||||
@@ -13,6 +13,8 @@
|
|||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef GPR_DUPLICATED
|
||||||
|
|
||||||
module VX_operands import VX_gpu_pkg::*; #(
|
module VX_operands import VX_gpu_pkg::*; #(
|
||||||
parameter CORE_ID = 0,
|
parameter CORE_ID = 0,
|
||||||
parameter CACHE_ENABLE = 0
|
parameter CACHE_ENABLE = 0
|
||||||
@@ -197,9 +199,10 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
assign stg_valid_in = scoreboard_if[i].valid && data_ready;
|
assign stg_valid_in = scoreboard_if[i].valid && data_ready;
|
||||||
assign scoreboard_if[i].ready = stg_ready_in && data_ready;
|
assign scoreboard_if[i].ready = stg_ready_in && data_ready;
|
||||||
|
|
||||||
// NOTE(hansung): toggle_buffer is 1-reg pipe without flow, halving
|
// NOTE(hansung): Cannot use stream_buffer here for full throughput
|
||||||
// throughput. Wouldn't this cap overall IPC? Or OK as long as
|
// because data registers (rs1_data, ...) are single-buffered. This
|
||||||
// ISSUE_WIDTH > 1?
|
// will probably cap IPC at 50% (notwithstanding the 1-operand-per-cycle
|
||||||
|
// limit.)
|
||||||
VX_toggle_buffer #(
|
VX_toggle_buffer #(
|
||||||
.DATAW (DATAW)
|
.DATAW (DATAW)
|
||||||
) staging_buffer (
|
) staging_buffer (
|
||||||
@@ -295,3 +298,5 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
`endif
|
||||||
|
|||||||
230
hw/rtl/core/VX_operands_dup.sv
Normal file
230
hw/rtl/core/VX_operands_dup.sv
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
// Copyright © 2019-2023
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifdef GPR_DUPLICATED
|
||||||
|
|
||||||
|
module VX_operands_dup import VX_gpu_pkg::*; #(
|
||||||
|
parameter CORE_ID = 0,
|
||||||
|
parameter CACHE_ENABLE = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
|
||||||
|
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||||
|
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||||
|
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||||
|
);
|
||||||
|
`UNUSED_PARAM (CORE_ID)
|
||||||
|
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||||
|
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO);
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||||
|
// NOTE(hansung): toggle_buffer is 1-reg pipe without flow, halving
|
||||||
|
// throughput. Wouldn't this cap overall IPC? Or OK as long as
|
||||||
|
// ISSUE_WIDTH > 1?
|
||||||
|
VX_stream_buffer #(
|
||||||
|
.DATAW (DATAW)
|
||||||
|
) staging_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (scoreboard_if[i].valid),
|
||||||
|
.data_in ({
|
||||||
|
scoreboard_if[i].data.uuid,
|
||||||
|
scoreboard_if[i].data.wis,
|
||||||
|
scoreboard_if[i].data.tmask,
|
||||||
|
scoreboard_if[i].data.PC,
|
||||||
|
scoreboard_if[i].data.wb,
|
||||||
|
scoreboard_if[i].data.ex_type,
|
||||||
|
scoreboard_if[i].data.op_type,
|
||||||
|
scoreboard_if[i].data.op_mod,
|
||||||
|
scoreboard_if[i].data.use_PC,
|
||||||
|
scoreboard_if[i].data.use_imm,
|
||||||
|
scoreboard_if[i].data.imm,
|
||||||
|
scoreboard_if[i].data.rd
|
||||||
|
}),
|
||||||
|
.ready_in (scoreboard_if[i].ready),
|
||||||
|
.valid_out (operands_if[i].valid),
|
||||||
|
.data_out ({
|
||||||
|
operands_if[i].data.uuid,
|
||||||
|
operands_if[i].data.wis,
|
||||||
|
operands_if[i].data.tmask,
|
||||||
|
operands_if[i].data.PC,
|
||||||
|
operands_if[i].data.wb,
|
||||||
|
operands_if[i].data.ex_type,
|
||||||
|
operands_if[i].data.op_type,
|
||||||
|
operands_if[i].data.op_mod,
|
||||||
|
operands_if[i].data.use_PC,
|
||||||
|
operands_if[i].data.use_imm,
|
||||||
|
operands_if[i].data.imm,
|
||||||
|
operands_if[i].data.rd
|
||||||
|
}),
|
||||||
|
.ready_out (operands_if[i].ready)
|
||||||
|
);
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||||
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||||
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
|
||||||
|
|
||||||
|
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||||
|
VX_stream_buffer #(
|
||||||
|
.DATAW (`XLEN + `XLEN + `XLEN)
|
||||||
|
) staging_data_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (scoreboard_if[i].valid),
|
||||||
|
.data_in ({
|
||||||
|
rs1_data[j], rs2_data[j], rs3_data[j]
|
||||||
|
}),
|
||||||
|
`UNUSED_PIN (ready_in),
|
||||||
|
`UNUSED_PIN (valid_out),
|
||||||
|
.data_out ({
|
||||||
|
operands_if[i].data.rs1_data[j],
|
||||||
|
operands_if[i].data.rs2_data[j],
|
||||||
|
operands_if[i].data.rs3_data[j]
|
||||||
|
}),
|
||||||
|
.ready_out (operands_if[i].ready)
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
// GPR banks
|
||||||
|
|
||||||
|
wire [RAM_ADDRW-1:0] gpr_rd_addr_rs1;
|
||||||
|
wire [RAM_ADDRW-1:0] gpr_rd_addr_rs2;
|
||||||
|
wire [RAM_ADDRW-1:0] gpr_rd_addr_rs3;
|
||||||
|
wire [RAM_ADDRW-1:0] gpr_wr_addr;
|
||||||
|
if (ISSUE_WIS != 0) begin
|
||||||
|
assign gpr_wr_addr = {writeback_if[i].data.wis, writeback_if[i].data.rd};
|
||||||
|
assign gpr_rd_addr_rs1 = {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs1};
|
||||||
|
assign gpr_rd_addr_rs2 = {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs2};
|
||||||
|
assign gpr_rd_addr_rs3 = {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs3};
|
||||||
|
// always @(posedge clk) begin
|
||||||
|
// if (reset) begin
|
||||||
|
// gpr_rd_addr_rs1 <= '0;
|
||||||
|
// gpr_rd_addr_rs2 <= '0;
|
||||||
|
// gpr_rd_addr_rs3 <= '0;
|
||||||
|
// end else begin
|
||||||
|
// // if (!(operands_if[i].valid && !operands_if[i].ready)) begin
|
||||||
|
// if (scoreboard_if[i].valid && scoreboard_if[i].ready) begin
|
||||||
|
// gpr_rd_addr_rs1 <= {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs1};
|
||||||
|
// gpr_rd_addr_rs2 <= {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs2};
|
||||||
|
// gpr_rd_addr_rs3 <= {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs3};
|
||||||
|
// end
|
||||||
|
// end
|
||||||
|
// end
|
||||||
|
end else begin
|
||||||
|
assign gpr_wr_addr = writeback_if[i].data.rd;
|
||||||
|
assign gpr_rd_addr_rs1 = scoreboard_if[i].data.rs1;
|
||||||
|
assign gpr_rd_addr_rs2 = scoreboard_if[i].data.rs2;
|
||||||
|
assign gpr_rd_addr_rs3 = scoreboard_if[i].data.rs3;
|
||||||
|
// always @(posedge clk) begin
|
||||||
|
// if (reset) begin
|
||||||
|
// gpr_rd_addr_rs1 <= '0;
|
||||||
|
// gpr_rd_addr_rs2 <= '0;
|
||||||
|
// gpr_rd_addr_rs3 <= '0;
|
||||||
|
// end else begin
|
||||||
|
// // if (!(operands_if[i].valid && !operands_if[i].ready)) begin
|
||||||
|
// if (scoreboard_if[i].valid && scoreboard_if[i].ready) begin
|
||||||
|
// gpr_rd_addr_rs1 <= scoreboard_if[i].data.rs1;
|
||||||
|
// gpr_rd_addr_rs2 <= scoreboard_if[i].data.rs2;
|
||||||
|
// gpr_rd_addr_rs3 <= scoreboard_if[i].data.rs3;
|
||||||
|
// end
|
||||||
|
// end
|
||||||
|
// end
|
||||||
|
end
|
||||||
|
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
reg wr_enabled = 0;
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
wr_enabled <= 1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
|
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||||
|
VX_dp_ram #(
|
||||||
|
.DATAW (`XLEN),
|
||||||
|
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.INIT_ENABLE (1),
|
||||||
|
.INIT_VALUE (0),
|
||||||
|
`endif
|
||||||
|
.NO_RWCHECK (1)
|
||||||
|
) gpr_ram_rs1 (
|
||||||
|
.clk (clk),
|
||||||
|
.read (1'b1),
|
||||||
|
`UNUSED_PIN (wren),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.write (wr_enabled && writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||||
|
`else
|
||||||
|
.write (writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||||
|
`endif
|
||||||
|
.waddr (gpr_wr_addr),
|
||||||
|
.wdata (writeback_if[i].data.data[j]),
|
||||||
|
.raddr (gpr_rd_addr_rs1),
|
||||||
|
.rdata (rs1_data[j])
|
||||||
|
);
|
||||||
|
|
||||||
|
VX_dp_ram #(
|
||||||
|
.DATAW (`XLEN),
|
||||||
|
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.INIT_ENABLE (1),
|
||||||
|
.INIT_VALUE (0),
|
||||||
|
`endif
|
||||||
|
.NO_RWCHECK (1)
|
||||||
|
) gpr_ram_rs2(
|
||||||
|
.clk (clk),
|
||||||
|
.read (1'b1),
|
||||||
|
`UNUSED_PIN (wren),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.write (wr_enabled && writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||||
|
`else
|
||||||
|
.write (writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||||
|
`endif
|
||||||
|
.waddr (gpr_wr_addr),
|
||||||
|
.wdata (writeback_if[i].data.data[j]),
|
||||||
|
.raddr (gpr_rd_addr_rs2),
|
||||||
|
.rdata (rs2_data[j])
|
||||||
|
);
|
||||||
|
|
||||||
|
VX_dp_ram #(
|
||||||
|
.DATAW (`XLEN),
|
||||||
|
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.INIT_ENABLE (1),
|
||||||
|
.INIT_VALUE (0),
|
||||||
|
`endif
|
||||||
|
.NO_RWCHECK (1)
|
||||||
|
) gpr_ram_rs3 (
|
||||||
|
.clk (clk),
|
||||||
|
.read (1'b1),
|
||||||
|
`UNUSED_PIN (wren),
|
||||||
|
`ifdef GPR_RESET
|
||||||
|
.write (wr_enabled && writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||||
|
`else
|
||||||
|
.write (writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||||
|
`endif
|
||||||
|
.waddr (gpr_wr_addr),
|
||||||
|
.wdata (writeback_if[i].data.data[j]),
|
||||||
|
.raddr (gpr_rd_addr_rs3),
|
||||||
|
.rdata (rs3_data[j])
|
||||||
|
);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -21,6 +21,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
`ifdef PERF_ENABLE
|
`ifdef PERF_ENABLE
|
||||||
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
|
output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls,
|
||||||
|
output reg [`PERF_CTR_BITS-1:0] perf_scb_any_unit_uses,
|
||||||
output reg [`PERF_CTR_BITS-1:0] perf_scb_fires,
|
output reg [`PERF_CTR_BITS-1:0] perf_scb_fires,
|
||||||
output reg [`PERF_CTR_BITS-1:0] perf_scb_any_fire_cycles,
|
output reg [`PERF_CTR_BITS-1:0] perf_scb_any_fire_cycles,
|
||||||
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
|
output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS],
|
||||||
@@ -45,6 +46,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle;
|
wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle;
|
||||||
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||||
|
reg [`ISSUE_WIDTH-1:0] perf_issue_any_unit_per_cycle;
|
||||||
|
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_any_unit_per_cycle, perf_any_unit_per_cycle_r;
|
||||||
|
|
||||||
wire [`ISSUE_WIDTH-1:0] perf_issue_fires_per_cycle;
|
wire [`ISSUE_WIDTH-1:0] perf_issue_fires_per_cycle;
|
||||||
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_fires_per_cycle, perf_fires_per_cycle_r;
|
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_fires_per_cycle, perf_fires_per_cycle_r;
|
||||||
@@ -53,6 +56,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
reg [`PERF_CTR_BITS-1:0] perf_scb_empty;
|
reg [`PERF_CTR_BITS-1:0] perf_scb_empty;
|
||||||
|
|
||||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||||
|
`POP_COUNT(perf_any_unit_per_cycle, perf_issue_any_unit_per_cycle);
|
||||||
`POP_COUNT(perf_fires_per_cycle, perf_issue_fires_per_cycle);
|
`POP_COUNT(perf_fires_per_cycle, perf_issue_fires_per_cycle);
|
||||||
assign perf_any_fire_per_cycle = |perf_issue_fires_per_cycle;
|
assign perf_any_fire_per_cycle = |perf_issue_fires_per_cycle;
|
||||||
|
|
||||||
@@ -95,6 +99,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
// );
|
// );
|
||||||
|
|
||||||
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
|
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
|
||||||
|
`BUFFER(perf_any_unit_per_cycle_r, perf_any_unit_per_cycle);
|
||||||
`BUFFER(perf_fires_per_cycle_r, perf_fires_per_cycle);
|
`BUFFER(perf_fires_per_cycle_r, perf_fires_per_cycle);
|
||||||
`BUFFER(perf_any_fire_per_cycle_r, perf_any_fire_per_cycle);
|
`BUFFER(perf_any_fire_per_cycle_r, perf_any_fire_per_cycle);
|
||||||
`BUFFER(perf_units_per_cycle_r, perf_units_per_cycle);
|
`BUFFER(perf_units_per_cycle_r, perf_units_per_cycle);
|
||||||
@@ -103,10 +108,12 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
perf_scb_stalls <= '0;
|
perf_scb_stalls <= '0;
|
||||||
|
perf_scb_any_unit_uses <= '0;
|
||||||
perf_scb_fires <= '0;
|
perf_scb_fires <= '0;
|
||||||
perf_scb_any_fire_cycles <= '0;
|
perf_scb_any_fire_cycles <= '0;
|
||||||
end else begin
|
end else begin
|
||||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
|
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
|
||||||
|
perf_scb_any_unit_uses <= perf_scb_any_unit_uses + `PERF_CTR_BITS'(perf_any_unit_per_cycle_r);
|
||||||
perf_scb_fires <= perf_scb_fires + `PERF_CTR_BITS'(perf_fires_per_cycle_r);
|
perf_scb_fires <= perf_scb_fires + `PERF_CTR_BITS'(perf_fires_per_cycle_r);
|
||||||
perf_scb_any_fire_cycles <= perf_scb_any_fire_cycles + `PERF_CTR_BITS'(perf_any_fire_per_cycle_r);
|
perf_scb_any_fire_cycles <= perf_scb_any_fire_cycles + `PERF_CTR_BITS'(perf_any_fire_per_cycle_r);
|
||||||
end
|
end
|
||||||
@@ -159,27 +166,32 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
perf_issue_units_per_cycle[i] = '0;
|
perf_issue_units_per_cycle[i] = '0;
|
||||||
|
perf_issue_any_unit_per_cycle[i] = '0;
|
||||||
perf_issue_sfu_per_cycle[i] = '0;
|
perf_issue_sfu_per_cycle[i] = '0;
|
||||||
if (ibuffer_if[i].valid) begin
|
if (ibuffer_if[i].valid) begin
|
||||||
if (inuse_rd) begin
|
if (inuse_rd) begin
|
||||||
|
perf_issue_any_unit_per_cycle[i] = '1;
|
||||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
||||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin
|
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin
|
||||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (inuse_rs1) begin
|
if (inuse_rs1) begin
|
||||||
|
perf_issue_any_unit_per_cycle[i] = '1;
|
||||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
||||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin
|
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin
|
||||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (inuse_rs2) begin
|
if (inuse_rs2) begin
|
||||||
|
perf_issue_any_unit_per_cycle[i] = '1;
|
||||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
||||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin
|
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin
|
||||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (inuse_rs3) begin
|
if (inuse_rs3) begin
|
||||||
|
perf_issue_any_unit_per_cycle[i] = '1;
|
||||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
||||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin
|
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin
|
||||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ interface VX_pipeline_perf_if ();
|
|||||||
wire [`PERF_CTR_BITS-1:0] sched_barrier_idles;
|
wire [`PERF_CTR_BITS-1:0] sched_barrier_idles;
|
||||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||||
|
wire [`PERF_CTR_BITS-1:0] scb_any_unit_uses;
|
||||||
wire [`PERF_CTR_BITS-1:0] scb_fires;
|
wire [`PERF_CTR_BITS-1:0] scb_fires;
|
||||||
wire [`PERF_CTR_BITS-1:0] scb_any_fire_cycles;
|
wire [`PERF_CTR_BITS-1:0] scb_any_fire_cycles;
|
||||||
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
|
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
|
||||||
@@ -43,6 +44,7 @@ interface VX_pipeline_perf_if ();
|
|||||||
modport issue (
|
modport issue (
|
||||||
output ibf_stalls,
|
output ibf_stalls,
|
||||||
output scb_stalls,
|
output scb_stalls,
|
||||||
|
output scb_any_unit_uses,
|
||||||
output scb_fires,
|
output scb_fires,
|
||||||
output scb_any_fire_cycles,
|
output scb_any_fire_cycles,
|
||||||
output units_uses,
|
output units_uses,
|
||||||
@@ -59,6 +61,7 @@ interface VX_pipeline_perf_if ();
|
|||||||
input sched_stalls,
|
input sched_stalls,
|
||||||
input ibf_stalls,
|
input ibf_stalls,
|
||||||
input scb_stalls,
|
input scb_stalls,
|
||||||
|
input scb_any_unit_uses,
|
||||||
input scb_fires,
|
input scb_fires,
|
||||||
input scb_any_fire_cycles,
|
input scb_any_fire_cycles,
|
||||||
input units_uses,
|
input units_uses,
|
||||||
|
|||||||
Reference in New Issue
Block a user