tensor: Rename substepExecute
This commit is contained in:
@@ -269,13 +269,13 @@ class TensorCoreDecoupled(
|
|||||||
|
|
||||||
require(respQueueA.bits.data.widthOption.get ==
|
require(respQueueA.bits.data.widthOption.get ==
|
||||||
io.writeback.bits.data.widthOption.get,
|
io.writeback.bits.data.widthOption.get,
|
||||||
"response data width does not match the writeback data width")
|
"response data width does not match the writeback data width")
|
||||||
|
|
||||||
val substepExecute = RegInit(0.U(1.W))
|
val substepDeqA = RegInit(0.U(1.W))
|
||||||
when (respQueueA.fire) {
|
when (respQueueA.fire) {
|
||||||
substepExecute := substepExecute + 1.U
|
substepDeqA := substepDeqA + 1.U
|
||||||
}
|
}
|
||||||
dontTouch(substepExecute)
|
dontTouch(substepDeqA)
|
||||||
|
|
||||||
// Do pipelining for the A operand so that we obtain the full 4x4 A tile
|
// Do pipelining for the A operand so that we obtain the full 4x4 A tile
|
||||||
// ready for compute. The pipeline is two-stage:
|
// ready for compute. The pipeline is two-stage:
|
||||||
@@ -292,7 +292,7 @@ class TensorCoreDecoupled(
|
|||||||
val halfAQueue = Module(new Queue(
|
val halfAQueue = Module(new Queue(
|
||||||
chiselTypeOf(respQueueA.bits), entries = 1, pipe = true
|
chiselTypeOf(respQueueA.bits), entries = 1, pipe = true
|
||||||
))
|
))
|
||||||
halfAQueue.io.enq.valid := respQueueA.valid && (substepExecute === 0.U)
|
halfAQueue.io.enq.valid := respQueueA.valid && (substepDeqA === 0.U)
|
||||||
halfAQueue.io.enq.bits := respQueueA.bits
|
halfAQueue.io.enq.bits := respQueueA.bits
|
||||||
|
|
||||||
// substep == 0 data goes to the LSB
|
// substep == 0 data goes to the LSB
|
||||||
@@ -305,9 +305,9 @@ class TensorCoreDecoupled(
|
|||||||
new TensorMemRespWithTag(dataWidth * 2), entries = 1, pipe = true
|
new TensorMemRespWithTag(dataWidth * 2), entries = 1, pipe = true
|
||||||
))
|
))
|
||||||
// hold first half A data for the first substep
|
// hold first half A data for the first substep
|
||||||
halfAQueue.io.deq.ready := respQueueA.valid && (substepExecute === 1.U) &&
|
halfAQueue.io.deq.ready := respQueueA.valid && (substepDeqA === 1.U) &&
|
||||||
fullAQueue.io.enq.ready
|
fullAQueue.io.enq.ready
|
||||||
fullAQueue.io.enq.valid := respQueueA.valid && (substepExecute === 1.U) &&
|
fullAQueue.io.enq.valid := respQueueA.valid && (substepDeqA === 1.U) &&
|
||||||
halfAQueue.io.deq.valid
|
halfAQueue.io.deq.valid
|
||||||
fullAQueue.io.enq.bits.data := fullAEnqData
|
fullAQueue.io.enq.bits.data := fullAEnqData
|
||||||
fullAQueue.io.enq.bits.tag := fullAEnqTag
|
fullAQueue.io.enq.bits.tag := fullAEnqTag
|
||||||
@@ -332,8 +332,8 @@ class TensorCoreDecoupled(
|
|||||||
// respQueueA output arbitrates to either halfAQueue or fullAQueue depending
|
// respQueueA output arbitrates to either halfAQueue or fullAQueue depending
|
||||||
// on the substep
|
// on the substep
|
||||||
respQueueA.ready := MuxCase(false.B,
|
respQueueA.ready := MuxCase(false.B,
|
||||||
Seq((substepExecute === 0.U) -> halfAQueue.io.enq.ready,
|
Seq((substepDeqA === 0.U) -> halfAQueue.io.enq.ready,
|
||||||
(substepExecute === 1.U) -> fullAQueue.io.enq.ready))
|
(substepDeqA === 1.U) -> fullAQueue.io.enq.ready))
|
||||||
// Hold B tile at respQueueB for multiple steps for reuse, only dequeue when
|
// Hold B tile at respQueueB for multiple steps for reuse, only dequeue when
|
||||||
// we fully iterated a column (M-dimension).
|
// we fully iterated a column (M-dimension).
|
||||||
val shouldDequeueBMask = ((1 << numTilesMBits) - 1).U
|
val shouldDequeueBMask = ((1 << numTilesMBits) - 1).U
|
||||||
|
|||||||
Reference in New Issue
Block a user