tensor: Enqueue both insts in pair to issue queue
Otherwise the first-in-pair instructions can run ahead, latching their inputs for the next pair before the second-in-pair insts finish compute on the current one. Might introduce more frontend stalls, need more experimenting
This commit is contained in:
@@ -95,7 +95,7 @@ module VX_tensor_dpu #(
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + $bits(wid)/* + $bits(D_tile)*/),
|
||||
// .DEPTH (`LATENCY_HMMA),
|
||||
.DEPTH (2),
|
||||
.DEPTH (4),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
|
||||
Reference in New Issue
Block a user