tensor: Write release logic for hgmma
Upon completion of an op, tensor_core_hopper sends a "ghost" commit signal down the pipeline with the `wb` and `tensor` bit set in commit_if. The scoreboard receives this signal via writeback_if and resets the inuse_tensor status bit back to zero, which unblocks the HGMMA_WAIT instruction.
This commit is contained in:
@@ -146,7 +146,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
// have an explicit destination register, use a separate status bit.
|
||||
reg [`UP(ISSUE_RATIO)-1:0] inuse_tensor;
|
||||
|
||||
wire hgmma_start = (ibuffer_if[i].data.ex_type == `EX_TENSOR) &&
|
||||
wire hgmma_start = (ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) &&
|
||||
(ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA);
|
||||
|
||||
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
||||
@@ -213,7 +213,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
`ifdef EXT_T_HOPPER
|
||||
wire hgmma_wait = ibuffer_if[i].valid &&
|
||||
(ibuffer_if[i].data.ex_type == `EX_TENSOR) &&
|
||||
(ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) &&
|
||||
(ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA_WAIT);
|
||||
wire hgmma_ready = ~(hgmma_wait && inuse_tensor[ibuffer_if[i].data.wis]);
|
||||
wire operands_ready = (~(| operands_busy)) && hgmma_ready;
|
||||
@@ -250,6 +250,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1;
|
||||
end
|
||||
`ifdef EXT_T_HOPPER
|
||||
if (writeback_fire && writeback_if[i].data.tensor) begin
|
||||
inuse_tensor[ibuffer_if[i].data.wis] <= 1'b0;
|
||||
end
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && hgmma_start) begin
|
||||
inuse_tensor[ibuffer_if[i].data.wis] <= 1'b1;
|
||||
end
|
||||
|
||||
@@ -11,6 +11,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
localparam NUM_LANES = `NUM_THREADS;
|
||||
localparam METADATA_QUEUE_DEPTH = 16; // FIXME: arbitrary
|
||||
|
||||
/* commit_if.data_t parts that we need to keep around:
|
||||
@@ -21,22 +22,17 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
- wb
|
||||
- rd
|
||||
*/
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS;
|
||||
|
||||
wire operand_enq_fire = execute_if.valid && execute_if.ready;
|
||||
wire commit_if_fire = commit_if.valid && commit_if.ready;
|
||||
wire [DATAW-1:0] execute_if_data_enq = {
|
||||
execute_if.data.uuid,
|
||||
execute_if.data.wid,
|
||||
execute_if.data.tmask,
|
||||
execute_if.data.PC,
|
||||
execute_if.data.wb,
|
||||
execute_if.data.rd
|
||||
// pid/sop/eop set later
|
||||
};
|
||||
|
||||
wire [`NUM_WARPS-1:0][DATAW-1:0] execute_if_data_deq;
|
||||
wire [`NUM_WARPS-1:0][`UUID_WIDTH-1:0] execute_if_data_uuid;
|
||||
wire [`NUM_WARPS-1:0][`NW_WIDTH-1:0] execute_if_data_wid;
|
||||
wire [`NUM_WARPS-1:0][NUM_LANES-1:0] execute_if_data_tmask;
|
||||
wire [`NUM_WARPS-1:0][`XLEN-1:0] execute_if_data_PC;
|
||||
wire [`NUM_WARPS-1:0] execute_if_data_wb;
|
||||
wire [`NUM_WARPS-1:0][`NR_BITS-1:0] execute_if_data_rd;
|
||||
logic [DATAW-1:0] execute_if_data_new_rd;
|
||||
|
||||
wire [`NUM_WARPS-1:0] metadata_queue_fulls;
|
||||
@@ -71,8 +67,12 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
.reset(reset),
|
||||
.push(enq),
|
||||
.pop(deq),
|
||||
.data_in(execute_if_data_enq),
|
||||
.data_out(execute_if_data_deq[i]),
|
||||
.data_in({execute_if.data.uuid, execute_if.data.wid,
|
||||
execute_if.data.tmask, execute_if.data.PC,
|
||||
execute_if.data.wb, execute_if.data.rd}),
|
||||
.data_out({execute_if_data_uuid[i], execute_if_data_wid[i],
|
||||
execute_if_data_tmask[i], execute_if_data_PC[i],
|
||||
execute_if_data_wb[i], execute_if_data_rd[i]}),
|
||||
.empty(metadata_queue_emptys[i]),
|
||||
`UNUSED_PIN(alm_empty),
|
||||
.full(metadata_queue_fulls[i]),
|
||||
@@ -108,11 +108,6 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
if ((state != STATE_IDLE) && (state_n == STATE_IDLE)) begin
|
||||
metadata_deq = 1'b1;
|
||||
end
|
||||
|
||||
// change rd of the commit data according to state
|
||||
execute_if_data_new_rd =
|
||||
{execute_if_data_deq[0/*FIXME*/][DATAW-1:`NR_BITS],
|
||||
(`NR_BITS'(`NUM_IREGS) + `NR_BITS'(state))};
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -128,19 +123,18 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0;
|
||||
|
||||
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1 + 1;
|
||||
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
||||
// write-back to the correct rd only when eop
|
||||
((state == 2'b11) ? execute_if_data_deq[0/*FIXME*/] : execute_if_data_new_rd), /* uuid ~ rd */
|
||||
wb_data, /* data */
|
||||
1'b0, /* tensor */
|
||||
1'b0, /* pid */
|
||||
1'b1, /* sop */
|
||||
(state == 2'b11) /* eop */
|
||||
// 1'b1 /* eop */
|
||||
};
|
||||
|
||||
assign commit_if.data = commit_if_data;
|
||||
assign commit_if.data.uuid = execute_if_data_uuid[0];
|
||||
assign commit_if.data.wid = execute_if_data_wid[0];
|
||||
assign commit_if.data.tmask = execute_if_data_tmask[0];
|
||||
assign commit_if.data.PC = execute_if_data_PC[0];
|
||||
assign commit_if.data.wb = (state == 2'b11);
|
||||
// custom rd
|
||||
assign commit_if.data.rd = (`NR_BITS'(`NUM_IREGS) + `NR_BITS'(state));
|
||||
assign commit_if.data.data = wb_data;
|
||||
assign commit_if.data.tensor = (state == 2'b11);
|
||||
assign commit_if.data.pid = 1'b0;
|
||||
assign commit_if.data.sop = 1'b1;
|
||||
assign commit_if.data.eop = (state == 2'b11);
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
||||
Reference in New Issue
Block a user