Properly handle upstream and downstream backpressure for respQueues

This commit is contained in:
Hansung Kim
2023-10-24 11:36:18 -07:00
parent 8e0904a1ad
commit e9c206dfa2
2 changed files with 23 additions and 21 deletions

View File

@@ -216,7 +216,7 @@ class VortexTile private (
// Conditionally instantiate memory coalescer
val coalescerNode = p(CoalescerKey) match {
case Some(coalescerParam) => {
val coal = LazyModule(new CoalescingUnit(coalescerParam.copy(enable = false)))
val coal = LazyModule(new CoalescingUnit(coalescerParam.copy(enable = true)))
coal.cpuNode :=* dmemAggregateNode
coal.aggregateNode // N+1 lanes
}
@@ -407,11 +407,10 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
val matchingSources = Wire(UInt(outer.numLanes.W))
matchingSources := dmemTLBundles
.map(b =>
// If there is no valid response across all lanes, matchingSources
// should always be 1, or otherwise downstream would think upstream
// is blocked and re-try sending
!arb.io.out.valid
|| (b.d.bits.source === arb.io.out.bits))
// If there is no valid response pending across all lanes,
// matchingSources should not filter out upstream ready signals, so
// set it to all-1
!arb.io.out.valid || (b.d.bits.source === arb.io.out.bits))
.asUInt
// connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes

View File

@@ -1022,11 +1022,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
)
respQueue.io.enq(respQueueNoncoalPort).valid := tlOut.d.valid
respQueue.io.enq(respQueueNoncoalPort).bits := resp
// TODO: deq.ready should respect upstream ready
respQueue.io.deq(respQueueNoncoalPort).ready := true.B
assert(respQueue.io.deq.length == 1,
"respQueue should have only one dequeue port to the upstream")
respQueue.io.deq.head.ready := tlIn.d.ready
tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid
tlIn.d.bits := respQueue.io.deq(respQueueNoncoalPort).bits.toTLD(edgeIn)
tlIn.d.valid := respQueue.io.deq.head.valid
tlIn.d.bits := respQueue.io.deq.head.bits.toTLD(edgeIn)
// Stall downstream when respQueue is full of entries waiting to enter core
tlOut.d.ready := respQueue.io.enq(respQueueNoncoalPort).ready
// Debug only
val inflightCounter = RegInit(UInt(32.W), 0.U)
@@ -1069,7 +1072,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
}
}
}
// uncoalescer backpressure
// Uncoalescer backpressure: stall downstream when uncoalescer is not ready
// to accept new coalesced response
tlCoal.d.ready := uncoalescer.io.coalResp.ready
// Debug
@@ -1125,20 +1129,20 @@ class Uncoalescer(
// Un-coalesce responses back to individual lanes
// Connect uncoalesced results back into each lane's response queue
val foundRow = io.inflightLookup.bits
(foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, ioEnqs), lane) =>
(foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, enqIOs), lane) =>
foundLane.reqs.zipWithIndex.foreach { case (foundReq, depth) =>
val ioEnq = ioEnqs(depth)
val enqIO = enqIOs(depth)
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
// cache. This should ideally not happen though (and hasn't happened yet
// in testing.)
assert(
ioEnq.ready,
enqIO.ready,
s"respQueue: enq port for ${depth}-th uncoalesced response is blocked for lane ${lane}"
)
// spatial-only coalescing: only looking at 0th srcId entry
ioEnq.valid := false.B
ioEnq.bits := DontCare
enqIO.valid := false.B
enqIO.bits := DontCare
// debug
// when (resp.valid) {
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
@@ -1146,11 +1150,11 @@ class Uncoalescer(
// dontTouch(q.io.enq(respQueueCoalPortOffset))
when(io.inflightLookup.valid && foundReq.valid) {
ioEnq.valid := io.coalResp.valid && foundReq.valid
ioEnq.bits.source := foundReq.source
enqIO.valid := io.coalResp.valid && foundReq.valid
enqIO.bits.source := foundReq.source
val logSize = foundRow.sizeEnumT.enumToLogSize(foundReq.sizeEnum)
ioEnq.bits.size := logSize
ioEnq.bits.data :=
enqIO.bits.size := logSize
enqIO.bits.data :=
getCoalescedDataChunk(
io.coalResp.bits.data,
io.coalResp.bits.data.getWidth,
@@ -1351,7 +1355,6 @@ object TLUtils {
Mux(opcode === TLMessages.PutFullData, true.B, false.B)
}
def DOpcodeIsStore(opcode: UInt, checkOpcode: Bool): Bool = {
printf("DOpcodeIsStore: opcode=%d\n", opcode)
when(checkOpcode) {
assert(
opcode === TLMessages.AccessAck || opcode === TLMessages.AccessAckData,