Properly handle upstream and downstream backpressure for respQueues

2023-10-24 11:36:18 -07:00
parent 8e0904a1ad
commit e9c206dfa2
2 changed files with 23 additions and 21 deletions
--- a/src/main/scala/tile/VortexTile.scala
+++ b/src/main/scala/tile/VortexTile.scala
@@ -216,7 +216,7 @@ class VortexTile private (
  // Conditionally instantiate memory coalescer
  val coalescerNode = p(CoalescerKey) match {
    case Some(coalescerParam) => {
-      val coal = LazyModule(new CoalescingUnit(coalescerParam.copy(enable = false)))
+      val coal = LazyModule(new CoalescingUnit(coalescerParam.copy(enable = true)))
      coal.cpuNode :=* dmemAggregateNode
      coal.aggregateNode // N+1 lanes
    }
@@ -407,11 +407,10 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
    val matchingSources = Wire(UInt(outer.numLanes.W))
    matchingSources := dmemTLBundles
      .map(b =>
-          // If there is no valid response across all lanes, matchingSources
-          // should always be 1, or otherwise downstream would think upstream
-          // is blocked and re-try sending
-          !arb.io.out.valid
-          || (b.d.bits.source === arb.io.out.bits))
+          // If there is no valid response pending across all lanes,
+          // matchingSources should not filter out upstream ready signals, so
+          // set it to all-1
+          !arb.io.out.valid || (b.d.bits.source === arb.io.out.bits))
      .asUInt

    // connection: VortexBundle <--> VortexTLAdapter <--> dmemNodes
--- a/src/main/scala/tilelink/Coalescing.scala
+++ b/src/main/scala/tilelink/Coalescing.scala
@@ -1022,11 +1022,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
      )
      respQueue.io.enq(respQueueNoncoalPort).valid := tlOut.d.valid
      respQueue.io.enq(respQueueNoncoalPort).bits := resp
-      // TODO: deq.ready should respect upstream ready
-      respQueue.io.deq(respQueueNoncoalPort).ready := true.B
+      assert(respQueue.io.deq.length == 1,
+        "respQueue should have only one dequeue port to the upstream")
+      respQueue.io.deq.head.ready := tlIn.d.ready

-      tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid
-      tlIn.d.bits := respQueue.io.deq(respQueueNoncoalPort).bits.toTLD(edgeIn)
+      tlIn.d.valid := respQueue.io.deq.head.valid
+      tlIn.d.bits := respQueue.io.deq.head.bits.toTLD(edgeIn)
+      // Stall downstream when respQueue is full of entries waiting to enter core
+      tlOut.d.ready := respQueue.io.enq(respQueueNoncoalPort).ready

      // Debug only
      val inflightCounter = RegInit(UInt(32.W), 0.U)
@@ -1069,7 +1072,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
      }
    }
  }
-  // uncoalescer backpressure
+  // Uncoalescer backpressure: stall downstream when uncoalescer is not ready
+  // to accept new coalesced response
  tlCoal.d.ready := uncoalescer.io.coalResp.ready

  // Debug
@@ -1125,20 +1129,20 @@ class Uncoalescer(
  // Un-coalesce responses back to individual lanes
  // Connect uncoalesced results back into each lane's response queue
  val foundRow = io.inflightLookup.bits
-  (foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, ioEnqs), lane) =>
+  (foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, enqIOs), lane) =>
    foundLane.reqs.zipWithIndex.foreach { case (foundReq, depth) =>
-      val ioEnq = ioEnqs(depth)
+      val enqIO = enqIOs(depth)

      // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
      // cache.  This should ideally not happen though (and hasn't happened yet
      // in testing.)
      assert(
-        ioEnq.ready,
+        enqIO.ready,
        s"respQueue: enq port for ${depth}-th uncoalesced response is blocked for lane ${lane}"
      )
      // spatial-only coalescing: only looking at 0th srcId entry
-      ioEnq.valid := false.B
-      ioEnq.bits := DontCare
+      enqIO.valid := false.B
+      enqIO.bits := DontCare
      // debug
      // when (resp.valid) {
      //   printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
@@ -1146,11 +1150,11 @@ class Uncoalescer(
      // dontTouch(q.io.enq(respQueueCoalPortOffset))

      when(io.inflightLookup.valid && foundReq.valid) {
-        ioEnq.valid := io.coalResp.valid && foundReq.valid
-        ioEnq.bits.source := foundReq.source
+        enqIO.valid := io.coalResp.valid && foundReq.valid
+        enqIO.bits.source := foundReq.source
        val logSize = foundRow.sizeEnumT.enumToLogSize(foundReq.sizeEnum)
-        ioEnq.bits.size := logSize
-        ioEnq.bits.data :=
+        enqIO.bits.size := logSize
+        enqIO.bits.data :=
          getCoalescedDataChunk(
            io.coalResp.bits.data,
            io.coalResp.bits.data.getWidth,
@@ -1351,7 +1355,6 @@ object TLUtils {
    Mux(opcode === TLMessages.PutFullData, true.B, false.B)
  }
  def DOpcodeIsStore(opcode: UInt, checkOpcode: Bool): Bool = {
-    printf("DOpcodeIsStore: opcode=%d\n", opcode)
    when(checkOpcode) {
      assert(
        opcode === TLMessages.AccessAck || opcode === TLMessages.AccessAckData,