From 6cd27faed266342c3c21a0ae5dfe68037797843a Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sat, 22 Apr 2023 23:04:47 -0700 Subject: [PATCH 1/2] Respect old req size in uncoalescer --- src/main/scala/tilelink/Coalescing.scala | 58 ++++++++++++------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 1a06db0..2484bac 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -269,22 +269,24 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule // FIXME: don't instantiate inflight table entry type here. It leaks the table's impl // detail to the coalescer val offsetBits = 4 // FIXME hardcoded - val sizeBits = 2 // FIXME hardcoded + val sizeBits = 4 // FIXME hardcoded. This is should be not the TL size bits + // but the width of the size enum val newEntry = Wire( new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits) ) - println(s"=========== table sourceWidth: ${sourceWidth}") println(s"=========== table sizeBits: ${sizeBits}") - newEntry.source := coalSourceId + val coalDataWidth = tlCoal.params.dataBits + println(s"=========== coalesced data width: ${coalDataWidth}") + val origReqs = reqQueues.map(q => q.io.deq.bits) newEntry.lanes.foreach { l => l.reqs.zipWithIndex.foreach { case (r, i) => // TODO: this part needs the actual coalescing logic to work r.valid := false.B - r.source := i.U // FIXME bogus - r.offset := 1.U - r.size := 2.U // FIXME hardcoded + r.source := origReqs(i).source + r.offset := (origReqs(i).address % (coalDataWidth / 8).U) >> log2Ceil(WordSizeInBytes()) + r.size := origReqs(i).size } } newEntry.lanes(0).reqs(0).valid := true.B @@ -293,8 +295,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule newEntry.lanes(3).reqs(0).valid := true.B dontTouch(newEntry) - // Uncoalescer module sncoalesces responses back to each lane - val coalDataWidth = tlCoal.params.dataBits + // Uncoalescer module uncoalesces responses back to each lane val uncoalescer = Module( new UncoalescingUnit( numLanes, @@ -312,8 +313,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source uncoalescer.io.coalRespData := tlCoal.d.bits.data - println(s"=========== coalRespData width: ${tlCoal.d.bits.data.widthOption.get}") - // Queue up synthesized uncoalesced responses into each lane's response queue (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) => lanes.zipWithIndex.foreach { case (resp, i) => @@ -359,7 +358,10 @@ class UncoalescingUnit( val uncoalResps = Output( Vec( numLanes, - Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, WordSizeInBytes() * 8, sizeWidth))) + Vec( + numPerLaneReqs, + ValidIO(new RespQueueEntry(sourceWidth, WordSizeInBytes() * 8, sizeWidth)) + ) ) ) }) @@ -380,20 +382,21 @@ class UncoalescingUnit( // Un-coalescing logic // - // FIXME: `size` should be UInt, not Int - def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, byteSize: Int): UInt = { - val bitSize = byteSize * 8 - val sizeMask = (1.U << bitSize) - 1.U + def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = { + val sizeInBits = (1.U << logSize) * 8.U assert( - dataWidth > 0 && dataWidth % bitSize == 0, - s"coalesced data width ($dataWidth) not evenly divisible by core req size ($bitSize)" + (dataWidth > 0).B && (dataWidth.U % sizeInBits === 0.U), + s"coalesced data width ($dataWidth) not evenly divisible by core req size ($sizeInBits)" ) - val numChunks = dataWidth / bitSize - val chunks = Wire(Vec(numChunks, UInt(bitSize.W))) + assert(logSize === 2.U || logSize === 0.U, "TODO: currently only supporting 4-byte accesses") + val numChunks = dataWidth / 32 + val chunks = Wire(Vec(numChunks, UInt(32.W))) val offsets = (0 until numChunks) (chunks zip offsets).foreach { case (c, o) => - // Take [(off-1)*size:off*size] starting from MSB - c := (data >> (dataWidth - (o + 1) * bitSize)) & sizeMask + // Take [(off+1)*size-1:off*size] starting from LSB + // FIXME: whether to take the offset from MSB or LSB depends on endianness + c := data(32 * (o + 1) - 1, 32 * o) + // c := (data >> (dataWidth - (o + 1) * 32)) & sizeMask } chunks(offset) // MUX } @@ -404,18 +407,16 @@ class UncoalescingUnit( perLane.reqs.zipWithIndex.foreach { case (oldReq, i) => val ioOldReq = ioPerLane(i) - // FIXME: only looking at 0th srcId entry - + // TODO: spatial-only coalescing: only looking at 0th srcId entry ioOldReq.valid := false.B ioOldReq.bits := DontCare when(inflightTable.io.lookup.valid) { ioOldReq.valid := oldReq.valid ioOldReq.bits.source := oldReq.source - // FIXME: disregard size enum for now - val byteSize = 4 + ioOldReq.bits.size := oldReq.size ioOldReq.bits.data := - getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, byteSize) + getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, oldReq.size) } } } @@ -1145,8 +1146,9 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { coreSideLogger.module.io.respBytes ) assert( - coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps, - "FAIL: number of requests and responses to the coalescer do not match" + (coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps) && + (coreSideLogger.module.io.reqBytes === coreSideLogger.module.io.respBytes), + "FAIL: requests and responses traffic to the coalescer do not match" ) } } From 2a82e8d1199849bb9442768d4de7cf7dccd0f386 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 23 Apr 2023 00:39:59 -0700 Subject: [PATCH 2/2] Fix TL data mask stencil logic in MemTraceLogger --- src/main/scala/tilelink/Coalescing.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 2484bac..62a7398 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -181,6 +181,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule resp.isStore := TLUtils.DOpcodeIsStore(tlOut.d.bits.opcode) resp.size := tlOut.d.bits.size resp.data := tlOut.d.bits.data + // NOTE: D channel doesn't have mask // Queue up responses that didn't get coalesced originally ("noncoalesced" responses). // Coalesced (but uncoalesced back) responses will also be enqueued into the same queue. @@ -954,8 +955,12 @@ class MemTraceLogger( "mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic" ) val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) - val mask = ~((~0.U) << (trailingZerosInMask * 8.U)) + val dataW = tlIn.params.dataBits + val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U)) req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) + // when (req.valid) { + // printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data) + // } when(req.valid) { TracePrintf(