From e9c206dfa25da1b282aa96a5ea19aa9d8291df6b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 24 Oct 2023 11:36:18 -0700 Subject: [PATCH] Properly handle upstream and downstream backpressure for respQueues --- src/main/scala/tile/VortexTile.scala | 11 ++++---- src/main/scala/tilelink/Coalescing.scala | 33 +++++++++++++----------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index e5ed0c4..4dc3cd9 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -216,7 +216,7 @@ class VortexTile private ( // Conditionally instantiate memory coalescer val coalescerNode = p(CoalescerKey) match { case Some(coalescerParam) => { - val coal = LazyModule(new CoalescingUnit(coalescerParam.copy(enable = false))) + val coal = LazyModule(new CoalescingUnit(coalescerParam.copy(enable = true))) coal.cpuNode :=* dmemAggregateNode coal.aggregateNode // N+1 lanes } @@ -407,11 +407,10 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { val matchingSources = Wire(UInt(outer.numLanes.W)) matchingSources := dmemTLBundles .map(b => - // If there is no valid response across all lanes, matchingSources - // should always be 1, or otherwise downstream would think upstream - // is blocked and re-try sending - !arb.io.out.valid - || (b.d.bits.source === arb.io.out.bits)) + // If there is no valid response pending across all lanes, + // matchingSources should not filter out upstream ready signals, so + // set it to all-1 + !arb.io.out.valid || (b.d.bits.source === arb.io.out.bits)) .asUInt // connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 10407ea..9885a16 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -1022,11 +1022,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) ) respQueue.io.enq(respQueueNoncoalPort).valid := tlOut.d.valid respQueue.io.enq(respQueueNoncoalPort).bits := resp - // TODO: deq.ready should respect upstream ready - respQueue.io.deq(respQueueNoncoalPort).ready := true.B + assert(respQueue.io.deq.length == 1, + "respQueue should have only one dequeue port to the upstream") + respQueue.io.deq.head.ready := tlIn.d.ready - tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid - tlIn.d.bits := respQueue.io.deq(respQueueNoncoalPort).bits.toTLD(edgeIn) + tlIn.d.valid := respQueue.io.deq.head.valid + tlIn.d.bits := respQueue.io.deq.head.bits.toTLD(edgeIn) + // Stall downstream when respQueue is full of entries waiting to enter core + tlOut.d.ready := respQueue.io.enq(respQueueNoncoalPort).ready // Debug only val inflightCounter = RegInit(UInt(32.W), 0.U) @@ -1069,7 +1072,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) } } } - // uncoalescer backpressure + // Uncoalescer backpressure: stall downstream when uncoalescer is not ready + // to accept new coalesced response tlCoal.d.ready := uncoalescer.io.coalResp.ready // Debug @@ -1125,20 +1129,20 @@ class Uncoalescer( // Un-coalesce responses back to individual lanes // Connect uncoalesced results back into each lane's response queue val foundRow = io.inflightLookup.bits - (foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, ioEnqs), lane) => + (foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, enqIOs), lane) => foundLane.reqs.zipWithIndex.foreach { case (foundReq, depth) => - val ioEnq = ioEnqs(depth) + val enqIO = enqIOs(depth) // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream // cache. This should ideally not happen though (and hasn't happened yet // in testing.) assert( - ioEnq.ready, + enqIO.ready, s"respQueue: enq port for ${depth}-th uncoalesced response is blocked for lane ${lane}" ) // spatial-only coalescing: only looking at 0th srcId entry - ioEnq.valid := false.B - ioEnq.bits := DontCare + enqIO.valid := false.B + enqIO.bits := DontCare // debug // when (resp.valid) { // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") @@ -1146,11 +1150,11 @@ class Uncoalescer( // dontTouch(q.io.enq(respQueueCoalPortOffset)) when(io.inflightLookup.valid && foundReq.valid) { - ioEnq.valid := io.coalResp.valid && foundReq.valid - ioEnq.bits.source := foundReq.source + enqIO.valid := io.coalResp.valid && foundReq.valid + enqIO.bits.source := foundReq.source val logSize = foundRow.sizeEnumT.enumToLogSize(foundReq.sizeEnum) - ioEnq.bits.size := logSize - ioEnq.bits.data := + enqIO.bits.size := logSize + enqIO.bits.data := getCoalescedDataChunk( io.coalResp.bits.data, io.coalResp.bits.data.getWidth, @@ -1351,7 +1355,6 @@ object TLUtils { Mux(opcode === TLMessages.PutFullData, true.B, false.B) } def DOpcodeIsStore(opcode: UInt, checkOpcode: Bool): Bool = { - printf("DOpcodeIsStore: opcode=%d\n", opcode) when(checkOpcode) { assert( opcode === TLMessages.AccessAck || opcode === TLMessages.AccessAckData,