Implement WU architecture support
This commit is contained in:
@@ -15,7 +15,8 @@
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter CORE_ID = 0,
|
||||
parameter DOMAIN = WU_DOMAIN_SCALAR
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -36,11 +37,15 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
`ifdef EXT_T_ENABLE
|
||||
VX_dispatch_if.master tensor_alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master tensor_lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master tensor_ctrl_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master tensor_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_PARAM (DOMAIN)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
@@ -68,8 +73,29 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
|
||||
VX_operands_if alu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] operands_wid;
|
||||
wire [`ISSUE_WIDTH-1:0] operands_is_tensor;
|
||||
wire [`ISSUE_WIDTH-1:0] tensor_alu_allowed;
|
||||
wire [`ISSUE_WIDTH-1:0] tensor_ctrl_allowed;
|
||||
wire [`ISSUE_WIDTH-1:0] tensor_wctl_allowed;
|
||||
wire [`ISSUE_WIDTH-1:0] tensor_sfu_allowed;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign alu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_ALU);
|
||||
assign operands_wid[i] = wis_to_wid(operands_if[i].data.wis, ISSUE_ISW_W'(i));
|
||||
assign operands_is_tensor[i] = operands_wid[i] >= `NW_WIDTH'(`NUM_SCALAR_WARPS);
|
||||
assign tensor_alu_allowed[i] = !`INST_ALU_IS_M(operands_if[i].data.op_mod)
|
||||
&& !`INST_ALU_IS_RED(operands_if[i].data.op_mod);
|
||||
assign tensor_ctrl_allowed[i] = (operands_if[i].data.op_type == `INST_SFU_TMC)
|
||||
|| (operands_if[i].data.op_type == `INST_SFU_CSRRS)
|
||||
|| (operands_if[i].data.op_type == `INST_SFU_BAR)
|
||||
|| (operands_if[i].data.op_type == `INST_SFU_BAR_MASK);
|
||||
assign tensor_wctl_allowed[i] = (operands_if[i].data.op_type == `INST_SFU_BAR)
|
||||
|| (operands_if[i].data.op_type == `INST_SFU_BAR_MASK);
|
||||
assign tensor_sfu_allowed[i] = tensor_ctrl_allowed[i] || tensor_wctl_allowed[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign alu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_ALU) && !operands_is_tensor[i];
|
||||
assign alu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
@@ -90,12 +116,43 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef EXT_T_ENABLE
|
||||
// Tensor INT/control dispatch
|
||||
|
||||
VX_operands_if tensor_alu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign tensor_alu_operands_if[i].valid = operands_if[i].valid
|
||||
&& (operands_if[i].data.ex_type == `EX_ALU)
|
||||
&& operands_is_tensor[i]
|
||||
&& tensor_alu_allowed[i];
|
||||
assign tensor_alu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (tensor_alu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) tensor_alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (tensor_alu_reset),
|
||||
.valid_in (tensor_alu_operands_if[i].valid),
|
||||
.ready_in (tensor_alu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(tensor_alu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (tensor_alu_dispatch_if[i].data),
|
||||
.valid_out (tensor_alu_dispatch_if[i].valid),
|
||||
.ready_out (tensor_alu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
`endif
|
||||
|
||||
// LSU dispatch
|
||||
|
||||
VX_operands_if lsu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU);
|
||||
assign lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU) && !operands_is_tensor[i];
|
||||
assign lsu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
@@ -116,6 +173,34 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef EXT_T_ENABLE
|
||||
// Tensor LSU dispatch
|
||||
|
||||
VX_operands_if tensor_lsu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign tensor_lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU) && operands_is_tensor[i];
|
||||
assign tensor_lsu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (tensor_lsu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) tensor_lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (tensor_lsu_reset),
|
||||
.valid_in (tensor_lsu_operands_if[i].valid),
|
||||
.ready_in (tensor_lsu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(tensor_lsu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (tensor_lsu_dispatch_if[i].data),
|
||||
.valid_out (tensor_lsu_dispatch_if[i].valid),
|
||||
.ready_out (tensor_lsu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
`endif
|
||||
|
||||
// FPU dispatch
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
@@ -123,7 +208,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
VX_operands_if fpu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign fpu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_FPU);
|
||||
assign fpu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_FPU) && !operands_is_tensor[i];
|
||||
assign fpu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
@@ -152,7 +237,9 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
VX_operands_if tensor_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign tensor_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_TENSOR);
|
||||
assign tensor_operands_if[i].valid = operands_if[i].valid
|
||||
&& (operands_if[i].data.ex_type == `EX_TENSOR)
|
||||
&& operands_is_tensor[i];
|
||||
assign tensor_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (tensor_reset, reset);
|
||||
@@ -174,12 +261,45 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef EXT_T_ENABLE
|
||||
// Tensor control dispatch
|
||||
|
||||
VX_operands_if tensor_ctrl_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign tensor_ctrl_operands_if[i].valid = operands_if[i].valid
|
||||
&& (operands_if[i].data.ex_type == `EX_SFU)
|
||||
&& operands_is_tensor[i]
|
||||
&& tensor_ctrl_allowed[i];
|
||||
assign tensor_ctrl_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (tensor_ctrl_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) tensor_ctrl_buffer (
|
||||
.clk (clk),
|
||||
.reset (tensor_ctrl_reset),
|
||||
.valid_in (tensor_ctrl_operands_if[i].valid),
|
||||
.ready_in (tensor_ctrl_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(tensor_ctrl_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (tensor_ctrl_dispatch_if[i].data),
|
||||
.valid_out (tensor_ctrl_dispatch_if[i].valid),
|
||||
.ready_out (tensor_ctrl_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
`endif
|
||||
|
||||
// SFU dispatch
|
||||
|
||||
VX_operands_if sfu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign sfu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_SFU);
|
||||
assign sfu_operands_if[i].valid = operands_if[i].valid
|
||||
&& (operands_if[i].data.ex_type == `EX_SFU)
|
||||
&& !operands_is_tensor[i];
|
||||
assign sfu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
@@ -202,17 +322,46 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
|
||||
// can take next request?
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign operands_if[i].ready = (alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU))
|
||||
|| (lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU))
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_FPU))
|
||||
assign operands_if[i].ready = (alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU) && !operands_is_tensor[i])
|
||||
|| (lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU) && !operands_is_tensor[i])
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_FPU) && !operands_is_tensor[i])
|
||||
`endif
|
||||
`ifdef EXT_T_ENABLE
|
||||
|| (tensor_alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU) && operands_is_tensor[i] && tensor_alu_allowed[i])
|
||||
|| (tensor_lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU) && operands_is_tensor[i])
|
||||
|| (tensor_ctrl_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU) && operands_is_tensor[i] && tensor_ctrl_allowed[i])
|
||||
|| (tensor_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_TENSOR) && operands_is_tensor[i])
|
||||
`endif
|
||||
`ifdef EXT_T_ENABLE
|
||||
|| (tensor_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_TENSOR))
|
||||
`endif
|
||||
|| (sfu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU));
|
||||
|| (sfu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU) && !operands_is_tensor[i]);
|
||||
end
|
||||
|
||||
`ifdef SIMULATION
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
`RUNTIME_ASSERT(
|
||||
!(operands_if[i].valid && (operands_if[i].data.ex_type == `EX_TENSOR)) || operands_is_tensor[i],
|
||||
("%t: *** core%0d-dispatch-illegal-scalar-tensor-op: wid=%0d PC=0x%0h op=0x%0h (#%0d)",
|
||||
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.uuid)
|
||||
)
|
||||
`RUNTIME_ASSERT(
|
||||
!(operands_if[i].valid && operands_is_tensor[i] && (operands_if[i].data.ex_type == `EX_FPU)),
|
||||
("%t: *** core%0d-dispatch-illegal-tensor-fpu-op: wid=%0d PC=0x%0h op=0x%0h (#%0d)",
|
||||
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.uuid)
|
||||
)
|
||||
`RUNTIME_ASSERT(
|
||||
!(operands_if[i].valid && operands_is_tensor[i] && (operands_if[i].data.ex_type == `EX_SFU) && !tensor_sfu_allowed[i]),
|
||||
("%t: *** core%0d-dispatch-illegal-tensor-sfu-op: wid=%0d PC=0x%0h op=0x%0h (#%0d)",
|
||||
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.uuid)
|
||||
)
|
||||
`RUNTIME_ASSERT(
|
||||
!(operands_if[i].valid && operands_is_tensor[i] && (operands_if[i].data.ex_type == `EX_ALU)
|
||||
&& (`INST_ALU_IS_M(operands_if[i].data.op_mod) || `INST_ALU_IS_RED(operands_if[i].data.op_mod))),
|
||||
("%t: *** core%0d-dispatch-illegal-tensor-complex-alu-op: wid=%0d PC=0x%0h op=0x%0h mod=0x%0h (#%0d)",
|
||||
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.op_mod, operands_if[i].data.uuid)
|
||||
)
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_stalls_per_cycle_r;
|
||||
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_valids_per_cycle_r;
|
||||
@@ -309,6 +458,16 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (!reset && ($time > `TRACE_STARTTIME)) begin
|
||||
if ((CORE_ID == 0)
|
||||
&& alu_dispatch_if[i].valid
|
||||
&& ((alu_dispatch_if[i].data.PC == 32'h80000010) || (alu_dispatch_if[i].data.PC == 32'h80000014))) begin
|
||||
`TRACE(1, ("%d: core%0d-alu-dispatch-buffer: isw=%0d, valid=%b, ready=%b, wid=%0d, PC=0x%0h, op=0x%0h, mod=%0d, wb=%0d, rd=%0d (#%0d)\n",
|
||||
$time, CORE_ID, i, alu_dispatch_if[i].valid, alu_dispatch_if[i].ready,
|
||||
wis_to_wid(alu_dispatch_if[i].data.wis, i), alu_dispatch_if[i].data.PC,
|
||||
alu_dispatch_if[i].data.op_type, alu_dispatch_if[i].data.op_mod,
|
||||
alu_dispatch_if[i].data.wb, alu_dispatch_if[i].data.rd,
|
||||
alu_dispatch_if[i].data.uuid));
|
||||
end
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), operands_if[i].data.PC));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
|
||||
Reference in New Issue
Block a user