tensor: Handle wid queue backpressure in dpu
This commit is contained in:
@@ -77,7 +77,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
|
||||
// octet. E.g. two tgs map lane 0-3 and lane 16-19 -> 16
|
||||
// FIXME: not sure this is the right logic. just filling in what works
|
||||
localparam LANE_OFFSET_THREADGROUP = (4 * NUM_OCTETS);
|
||||
localparam REQ_QUEUE_DEPTH = 4;
|
||||
localparam METADATA_QUEUE_DEPTH = 4;
|
||||
|
||||
wire [1:0] step = 2'(execute_if.data.op_type);
|
||||
wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));
|
||||
@@ -220,7 +220,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW(DATAW),
|
||||
.DEPTH(REQ_QUEUE_DEPTH)
|
||||
.DEPTH(METADATA_QUEUE_DEPTH)
|
||||
) pending_uops (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
||||
@@ -93,7 +93,7 @@ module VX_tensor_dpu #(
|
||||
wire enq = valid_in && ready_in;
|
||||
wire deq = valid_out && ready_out;
|
||||
|
||||
assign ready_in = &(threadgroup_readys);
|
||||
assign ready_in = &(threadgroup_readys) && !full;
|
||||
assign valid_out = &(threadgroup_valids);
|
||||
|
||||
// need to pass along warp id's to do multithreading
|
||||
@@ -109,13 +109,11 @@ module VX_tensor_dpu #(
|
||||
.data_out (D_wid),
|
||||
.empty (empty),
|
||||
`UNUSED_PIN(alm_empty),
|
||||
.full (full), // should be impossible to overflow
|
||||
.full (full),
|
||||
`UNUSED_PIN(alm_full),
|
||||
`UNUSED_PIN(size)
|
||||
);
|
||||
|
||||
`RUNTIME_ASSERT(reset || !full, ("dpu wid queue is full!"))
|
||||
|
||||
// `RUNTIME_ASSERT(reset || (&(threadgroup_valids) == valid_out),
|
||||
// ("FEDP and metadata queue went out of sync!"))
|
||||
endmodule
|
||||
|
||||
Reference in New Issue
Block a user