tensor: Handle wid queue backpressure in dpu

2024-05-30 15:25:00 -07:00
parent 2e2decc8b6
commit 2743d32bd2
2 changed files with 4 additions and 6 deletions
--- a/hw/rtl/core/VX_tensor_core.sv
+++ b/hw/rtl/core/VX_tensor_core.sv
@@ -77,7 +77,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
    // octet. E.g. two tgs map lane 0-3 and lane 16-19 -> 16
    // FIXME: not sure this is the right logic.  just filling in what works
    localparam LANE_OFFSET_THREADGROUP = (4 * NUM_OCTETS);
-    localparam REQ_QUEUE_DEPTH = 4;
+    localparam METADATA_QUEUE_DEPTH = 4;

    wire [1:0] step = 2'(execute_if.data.op_type);
    wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));
@@ -220,7 +220,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(

        VX_fifo_queue #(
            .DATAW(DATAW),
-            .DEPTH(REQ_QUEUE_DEPTH)
+            .DEPTH(METADATA_QUEUE_DEPTH)
        ) pending_uops (
            .clk(clk),
            .reset(reset),
--- a/hw/rtl/fpu/VX_tensor_dpu.sv
+++ b/hw/rtl/fpu/VX_tensor_dpu.sv
@@ -93,7 +93,7 @@ module VX_tensor_dpu #(
    wire enq = valid_in && ready_in;
    wire deq = valid_out && ready_out;

-    assign ready_in  = &(threadgroup_readys);
+    assign ready_in  = &(threadgroup_readys) && !full;
    assign valid_out = &(threadgroup_valids);

    // need to pass along warp id's to do multithreading
@@ -109,13 +109,11 @@ module VX_tensor_dpu #(
        .data_out  (D_wid),
        .empty     (empty),
        `UNUSED_PIN(alm_empty),
-        .full      (full), // should be impossible to overflow
+        .full      (full),
        `UNUSED_PIN(alm_full),
        `UNUSED_PIN(size)
    );

-    `RUNTIME_ASSERT(reset || !full, ("dpu wid queue is full!"))
-
    // `RUNTIME_ASSERT(reset || (&(threadgroup_valids) == valid_out),
    //                 ("FEDP and metadata queue went out of sync!"))
 endmodule