tensor: Block both HGMMA/HGMMA_WAIT at scoreboard
If we let back-to-back HGMMAs pass at scoreboard, we can't accurately keep track of the busy state of the tensor core and block WAITs accordingly. TODO: Distinguish "ready-to-fire" from "ready-to-use-writeback".
This commit is contained in:
@@ -209,13 +209,15 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
assign perf_issue_fires_per_cycle[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
assign perf_issue_fires_per_cycle[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// NOTE(hansung): why is inuse_rd checked? to prevent WAW?
|
|
||||||
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||||
`ifdef EXT_T_HOPPER
|
`ifdef EXT_T_HOPPER
|
||||||
wire hgmma_wait = ibuffer_if[i].valid &&
|
wire hgmma_wait = ibuffer_if[i].valid &&
|
||||||
(ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) &&
|
(ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) &&
|
||||||
(ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA_WAIT);
|
(ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA_WAIT);
|
||||||
wire hgmma_ready = ~(hgmma_wait && inuse_tensor[ibuffer_if[i].data.wis]);
|
// block both HGMMA and HGMMA_WAIT until inuse goes down. If we pass
|
||||||
|
// HGMMA through, we can't accurately keep track of the busy state of
|
||||||
|
// the tensor core and block WAITs accordingly.
|
||||||
|
wire hgmma_ready = !inuse_tensor[ibuffer_if[i].data.wis];
|
||||||
wire operands_ready = (~(| operands_busy)) && hgmma_ready;
|
wire operands_ready = (~(| operands_busy)) && hgmma_ready;
|
||||||
`else
|
`else
|
||||||
wire operands_ready = ~(| operands_busy);
|
wire operands_ready = ~(| operands_busy);
|
||||||
|
|||||||
@@ -80,8 +80,8 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
// this shouldn't really happen unless there's a big contention over
|
// NOTE: this is not an error but tells us if backend doesn't keep up with
|
||||||
// the commit stage
|
// HGMMA calls from the kernel
|
||||||
`RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
|
`RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
|
||||||
|
|
||||||
wire initiate_ready;
|
wire initiate_ready;
|
||||||
@@ -222,7 +222,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
commit_if.data.PC = execute_if_data_PC[0];
|
commit_if.data.PC = execute_if_data_PC[0];
|
||||||
commit_if.data.wb = execute_if_data_wb[0];
|
commit_if.data.wb = execute_if_data_wb[0];
|
||||||
commit_if.data.rd = execute_if_data_rd[0];
|
commit_if.data.rd = execute_if_data_rd[0];
|
||||||
commit_if.data.data = '0; // FIXME ?
|
commit_if.data.data = '0; // can be arbitrary as rd is zero
|
||||||
commit_if.data.tensor = 1'b0;
|
commit_if.data.tensor = 1'b0;
|
||||||
commit_if.data.pid = 1'b0;
|
commit_if.data.pid = 1'b0;
|
||||||
commit_if.data.sop = 1'b1;
|
commit_if.data.sop = 1'b1;
|
||||||
|
|||||||
Reference in New Issue
Block a user