tensor: Block both HGMMA/HGMMA_WAIT at scoreboard

If we let back-to-back HGMMAs pass at scoreboard, we can't accurately keep track of the busy state of the tensor core and block WAITs accordingly. TODO: Distinguish "ready-to-fire" from "ready-to-use-writeback".
2024-10-22 21:10:55 -07:00
parent 83979c3341
commit 98eb7cb594
2 changed files with 7 additions and 5 deletions
--- a/hw/rtl/core/VX_scoreboard.sv
+++ b/hw/rtl/core/VX_scoreboard.sv
@@ -209,13 +209,15 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
        assign perf_issue_fires_per_cycle[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
    `endif
        // NOTE(hansung): why is inuse_rd checked? to prevent WAW?
        wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
    `ifdef EXT_T_HOPPER
        wire hgmma_wait = ibuffer_if[i].valid &&
            (ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) &&
            (ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA_WAIT);
-        wire hgmma_ready = ~(hgmma_wait && inuse_tensor[ibuffer_if[i].data.wis]);
+        // block both HGMMA and HGMMA_WAIT until inuse goes down.  If we pass
        // HGMMA through, we can't accurately keep track of the busy state of
        // the tensor core and block WAITs accordingly.
        wire hgmma_ready = !inuse_tensor[ibuffer_if[i].data.wis];
        wire operands_ready = (~(| operands_busy)) && hgmma_ready;
    `else
        wire operands_ready = ~(| operands_busy);
--- a/hw/rtl/core/VX_tensor_hopper_core.sv
+++ b/hw/rtl/core/VX_tensor_hopper_core.sv
@@ -80,8 +80,8 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
        );
    end
-    // this shouldn't really happen unless there's a big contention over
+    // NOTE: this is not an error but tells us if backend doesn't keep up with
-    // the commit stage
+    // HGMMA calls from the kernel
    `RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
    wire initiate_ready;
@@ -222,7 +222,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
            commit_if.data.PC     = execute_if_data_PC[0];
            commit_if.data.wb     = execute_if_data_wb[0];
            commit_if.data.rd     = execute_if_data_rd[0];
-            commit_if.data.data   = '0; // FIXME ?
+            commit_if.data.data   = '0; // can be arbitrary as rd is zero
            commit_if.data.tensor = 1'b0;
            commit_if.data.pid    = 1'b0;
            commit_if.data.sop    = 1'b1;