Shush display
This commit is contained in:
@@ -81,6 +81,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
|
|||||||
localparam METADATA_QUEUE_DEPTH = 2 * `LATENCY_HMMA;
|
localparam METADATA_QUEUE_DEPTH = 2 * `LATENCY_HMMA;
|
||||||
|
|
||||||
wire [1:0] step = 2'(execute_if.data.op_type);
|
wire [1:0] step = 2'(execute_if.data.op_type);
|
||||||
|
// op_mod is reused to indicate instruction's id in pair
|
||||||
wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));
|
wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));
|
||||||
|
|
||||||
logic [NUM_OCTETS-1:0] octet_results_valid;
|
logic [NUM_OCTETS-1:0] octet_results_valid;
|
||||||
@@ -115,7 +116,6 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
|
|||||||
logic result_valid;
|
logic result_valid;
|
||||||
logic result_ready;
|
logic result_ready;
|
||||||
|
|
||||||
// op_mod is reused to indicate instruction's id in pair
|
|
||||||
VX_tensor_octet #(
|
VX_tensor_octet #(
|
||||||
.ISW(ISW),
|
.ISW(ISW),
|
||||||
.OCTET(i)
|
.OCTET(i)
|
||||||
|
|||||||
@@ -128,9 +128,8 @@ module VX_uop_sequencer import VX_gpu_pkg::*; (
|
|||||||
if (uop_sequencer_if.valid && use_uop &&
|
if (uop_sequencer_if.valid && use_uop &&
|
||||||
uop_sequencer_if.data.rd == `NR_BITS'(1)) begin
|
uop_sequencer_if.data.rd == `NR_BITS'(1)) begin
|
||||||
// a little sketchy? but shouldn't create any loop
|
// a little sketchy? but shouldn't create any loop
|
||||||
ibuffer_if.data.rd = ibuffer_if.data.rd + `NR_BITS'(8);
|
ibuffer_if.data.rd = ibuffer_if.data.rd + `NR_BITS'(8); // FIXME: 8 is hardcoded
|
||||||
ibuffer_if.data.rs3 = ibuffer_if.data.rs3 + `NR_BITS'(8);
|
ibuffer_if.data.rs3 = ibuffer_if.data.rs3 + `NR_BITS'(8);
|
||||||
$display("yoooooooo! uop rd=%d\n", ibuffer_if.data.rd);
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -234,6 +234,9 @@ module VX_tensor_threadgroup #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// TODO: Instead of latching half-result and constructing a full D tile,
|
||||||
|
// we should be able to send these half fragments down to commit stage
|
||||||
|
// immediately, saving flop space
|
||||||
assign D_frag[0][0] = D_reg[0];
|
assign D_frag[0][0] = D_reg[0];
|
||||||
assign D_frag[0][2] = D_reg[1];
|
assign D_frag[0][2] = D_reg[1];
|
||||||
assign D_frag[1][0] = D_reg[2];
|
assign D_frag[1][0] = D_reg[2];
|
||||||
|
|||||||
Reference in New Issue
Block a user