merge graphics

2023-04-23 00:54:24 -07:00
parent 684f732a9a 2a82e8d119
commit d28216182c
1 changed files with 32 additions and 29 deletions
--- a/src/main/scala/tilelink/Coalescing.scala
+++ b/src/main/scala/tilelink/Coalescing.scala
@@ -348,6 +348,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
      resp.size := tlOut.d.bits.size
      resp.data := tlOut.d.bits.data
      resp.error := tlOut.d.bits.denied
+      // NOTE: D channel doesn't have mask

      // Queue up responses that didn't get coalesced originally ("noncoalesced" responses).
      // Coalesced (but uncoalesced back) responses will also be enqueued into the same queue.
@@ -396,24 +397,25 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
  // FIXME: don't instantiate inflight table entry type here.  It leaks the table's impl
  // detail to the coalescer
  val offsetBits = 4 // FIXME hardcoded
-  val sizeBits = 2 // FIXME hardcoded
+  val sizeBits = 4 // FIXME hardcoded.  This is should be not the TL size bits
+  // but the width of the size enum
  val newEntry = Wire(
    new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
  )
-
  println(s"=========== table sourceWidth: ${sourceWidth}")
  println(s"=========== table sizeBits: ${sizeBits}")
-
  newEntry.source := coalescer.io.out_req.bits.source

  // TODO: richard to write table fill logic
+  assert(tlCoal.params.dataBits == (1 << CoalescerConsts.MAX_SIZE) * 8, "tlCoal parameters mismatch coalescer constant")
+  val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
  newEntry.lanes.foreach { l =>
    l.reqs.zipWithIndex.foreach { case (r, i) =>
      // TODO: this part needs the actual coalescing logic to work
      r.valid := false.B
-      r.source := i.U // FIXME bogus
-      r.offset := 1.U
-      r.size := 2.U // FIXME hardcoded
+      r.source := origReqs(i).source
+      r.offset := (origReqs(i).address % (1 << CoalescerConsts.MAX_SIZE).U) >> CoalescerConsts.WORD_WIDTH
+      r.size := origReqs(i).size
    }
  }
  newEntry.lanes(0).reqs(0).valid := true.B
@@ -422,15 +424,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
  newEntry.lanes(3).reqs(0).valid := true.B
  dontTouch(newEntry)

-  // Uncoalescer module sncoalesces responses back to each lane
-  val coalDataWidth = tlCoal.params.dataBits
+  // Uncoalescer module uncoalesces responses back to each lane
  val uncoalescer = Module(
    new UncoalescingUnit(
      numLanes,
      numPerLaneReqs,
      sourceWidth,
      CoalescerConsts.WORD_WIDTH,
-      coalDataWidth,
+      (1 << CoalescerConsts.MAX_SIZE),
      outer.numInflightCoalRequests
    )
  )
@@ -441,8 +442,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
  uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source
  uncoalescer.io.coalRespData := tlCoal.d.bits.data

-  println(s"=========== coalRespData width: ${tlCoal.d.bits.data.widthOption.get}")
-
  // Queue up synthesized uncoalesced responses into each lane's response queue
  (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
    lanes.zipWithIndex.foreach { case (resp, i) =>
@@ -508,20 +507,21 @@ class UncoalescingUnit(

  // Un-coalescing logic
  //
-  // FIXME: `size` should be UInt, not Int
-  def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, byteSize: Int): UInt = {
-    val bitSize = byteSize * 8
-    val sizeMask = (1.U << bitSize) - 1.U
+  def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
+    val sizeInBits = (1.U << logSize) * 8.U
    assert(
-      dataWidth > 0 && dataWidth % bitSize == 0,
-      s"coalesced data width ($dataWidth) not evenly divisible by core req size ($bitSize)"
+      (dataWidth > 0).B && (dataWidth.U % sizeInBits === 0.U),
+      s"coalesced data width ($dataWidth) not evenly divisible by core req size ($sizeInBits)"
    )
-    val numChunks = dataWidth / bitSize
-    val chunks = Wire(Vec(numChunks, UInt(bitSize.W)))
+    assert(logSize === 2.U || logSize === 0.U, "TODO: currently only supporting 4-byte accesses")
+    val numChunks = dataWidth / 32
+    val chunks = Wire(Vec(numChunks, UInt(32.W)))
    val offsets = (0 until numChunks)
    (chunks zip offsets).foreach { case (c, o) =>
-      // Take [(off-1)*size:off*size] starting from MSB
-      c := (data >> (dataWidth - (o + 1) * bitSize)) & sizeMask
+      // Take [(off+1)*size-1:off*size] starting from LSB
+      // FIXME: whether to take the offset from MSB or LSB depends on endianness
+      c := data(32 * (o + 1) - 1, 32 * o)
+    // c := (data >> (dataWidth - (o + 1) * 32)) & sizeMask
    }
    chunks(offset) // MUX
  }
@@ -532,18 +532,16 @@ class UncoalescingUnit(
    perLane.reqs.zipWithIndex.foreach { case (oldReq, i) =>
      val ioOldReq = ioPerLane(i)

-      // FIXME: only looking at 0th srcId entry
-
+      // TODO: spatial-only coalescing: only looking at 0th srcId entry
      ioOldReq.valid := false.B
      ioOldReq.bits := DontCare

      when(inflightTable.io.lookup.valid) {
        ioOldReq.valid := oldReq.valid
        ioOldReq.bits.source := oldReq.source
-        // FIXME: disregard size enum for now
-        val byteSize = 4
+        ioOldReq.bits.size := oldReq.size
        ioOldReq.bits.data :=
-          getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, byteSize)
+          getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, oldReq.size)
      }
    }
  }
@@ -991,8 +989,12 @@ class MemTraceLogger(
          "mask HIGH bits do not match the TL size.  This should have been handled by the TL generator logic"
        )
        val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
-        val mask = ~((~0.U) << (trailingZerosInMask * 8.U))
+        val dataW = tlIn.params.dataBits
+        val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U))
        req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U))
+        // when (req.valid) {
+        //   printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data)
+        // }

        when(req.valid) {
          TracePrintf(
@@ -1183,8 +1185,9 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
        coreSideLogger.module.io.respBytes
      )
      assert(
-        coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps,
-        "FAIL: number of requests and responses to the coalescer do not match"
+        (coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps) &&
+          (coreSideLogger.module.io.reqBytes === coreSideLogger.module.io.respBytes),
+        "FAIL: requests and responses traffic to the coalescer do not match"
      )
    }
  }