Shush display
This commit is contained in:
@@ -81,6 +81,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
|
||||
localparam METADATA_QUEUE_DEPTH = 2 * `LATENCY_HMMA;
|
||||
|
||||
wire [1:0] step = 2'(execute_if.data.op_type);
|
||||
// op_mod is reused to indicate instruction's id in pair
|
||||
wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));
|
||||
|
||||
logic [NUM_OCTETS-1:0] octet_results_valid;
|
||||
@@ -115,7 +116,6 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
|
||||
logic result_valid;
|
||||
logic result_ready;
|
||||
|
||||
// op_mod is reused to indicate instruction's id in pair
|
||||
VX_tensor_octet #(
|
||||
.ISW(ISW),
|
||||
.OCTET(i)
|
||||
|
||||
@@ -128,9 +128,8 @@ module VX_uop_sequencer import VX_gpu_pkg::*; (
|
||||
if (uop_sequencer_if.valid && use_uop &&
|
||||
uop_sequencer_if.data.rd == `NR_BITS'(1)) begin
|
||||
// a little sketchy? but shouldn't create any loop
|
||||
ibuffer_if.data.rd = ibuffer_if.data.rd + `NR_BITS'(8);
|
||||
ibuffer_if.data.rd = ibuffer_if.data.rd + `NR_BITS'(8); // FIXME: 8 is hardcoded
|
||||
ibuffer_if.data.rs3 = ibuffer_if.data.rs3 + `NR_BITS'(8);
|
||||
$display("yoooooooo! uop rd=%d\n", ibuffer_if.data.rd);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -234,6 +234,9 @@ module VX_tensor_threadgroup #(
|
||||
end
|
||||
end
|
||||
|
||||
// TODO: Instead of latching half-result and constructing a full D tile,
|
||||
// we should be able to send these half fragments down to commit stage
|
||||
// immediately, saving flop space
|
||||
assign D_frag[0][0] = D_reg[0];
|
||||
assign D_frag[0][2] = D_reg[1];
|
||||
assign D_frag[1][0] = D_reg[2];
|
||||
|
||||
Reference in New Issue
Block a user