Shush display

2024-06-03 13:04:09 -07:00
parent 9caafb2d8a
commit 12f8722dd5
3 changed files with 5 additions and 3 deletions
--- a/hw/rtl/core/VX_tensor_core.sv
+++ b/hw/rtl/core/VX_tensor_core.sv
@@ -81,6 +81,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
    localparam METADATA_QUEUE_DEPTH = 2 * `LATENCY_HMMA;

    wire [1:0] step = 2'(execute_if.data.op_type);
+    // op_mod is reused to indicate instruction's id in pair
    wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));

    logic [NUM_OCTETS-1:0] octet_results_valid;
@@ -115,7 +116,6 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
        logic result_valid;
        logic result_ready;

-        // op_mod is reused to indicate instruction's id in pair
        VX_tensor_octet #(
            .ISW(ISW),
            .OCTET(i)
--- a/hw/rtl/core/VX_uop_sequencer.sv
+++ b/hw/rtl/core/VX_uop_sequencer.sv
@@ -128,9 +128,8 @@ module VX_uop_sequencer import VX_gpu_pkg::*; (
        if (uop_sequencer_if.valid && use_uop &&
            uop_sequencer_if.data.rd  == `NR_BITS'(1)) begin
            // a little sketchy? but shouldn't create any loop
-            ibuffer_if.data.rd  = ibuffer_if.data.rd  + `NR_BITS'(8);
+            ibuffer_if.data.rd  = ibuffer_if.data.rd  + `NR_BITS'(8); // FIXME: 8 is hardcoded
            ibuffer_if.data.rs3 = ibuffer_if.data.rs3 + `NR_BITS'(8);
-            $display("yoooooooo! uop rd=%d\n", ibuffer_if.data.rd);
        end
    end

--- a/hw/rtl/fpu/VX_tensor_dpu.sv
+++ b/hw/rtl/fpu/VX_tensor_dpu.sv
@@ -234,6 +234,9 @@ module VX_tensor_threadgroup #(
        end
    end

+    // TODO: Instead of latching half-result and constructing a full D tile,
+    // we should be able to send these half fragments down to commit stage
+    // immediately, saving flop space
    assign D_frag[0][0] = D_reg[0];
    assign D_frag[0][2] = D_reg[1];
    assign D_frag[1][0] = D_reg[2];