diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index ac4132c..f50c7a7 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -348,6 +348,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule resp.size := tlOut.d.bits.size resp.data := tlOut.d.bits.data resp.error := tlOut.d.bits.denied + // NOTE: D channel doesn't have mask // Queue up responses that didn't get coalesced originally ("noncoalesced" responses). // Coalesced (but uncoalesced back) responses will also be enqueued into the same queue. @@ -396,24 +397,25 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule // FIXME: don't instantiate inflight table entry type here. It leaks the table's impl // detail to the coalescer val offsetBits = 4 // FIXME hardcoded - val sizeBits = 2 // FIXME hardcoded + val sizeBits = 4 // FIXME hardcoded. This is should be not the TL size bits + // but the width of the size enum val newEntry = Wire( new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits) ) - println(s"=========== table sourceWidth: ${sourceWidth}") println(s"=========== table sizeBits: ${sizeBits}") - newEntry.source := coalescer.io.out_req.bits.source // TODO: richard to write table fill logic + assert(tlCoal.params.dataBits == (1 << CoalescerConsts.MAX_SIZE) * 8, "tlCoal parameters mismatch coalescer constant") + val origReqs = reqQueues.map(q => q.io.queue.deq.bits) newEntry.lanes.foreach { l => l.reqs.zipWithIndex.foreach { case (r, i) => // TODO: this part needs the actual coalescing logic to work r.valid := false.B - r.source := i.U // FIXME bogus - r.offset := 1.U - r.size := 2.U // FIXME hardcoded + r.source := origReqs(i).source + r.offset := (origReqs(i).address % (1 << CoalescerConsts.MAX_SIZE).U) >> CoalescerConsts.WORD_WIDTH + r.size := origReqs(i).size } } newEntry.lanes(0).reqs(0).valid := true.B @@ -422,15 +424,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule newEntry.lanes(3).reqs(0).valid := true.B dontTouch(newEntry) - // Uncoalescer module sncoalesces responses back to each lane - val coalDataWidth = tlCoal.params.dataBits + // Uncoalescer module uncoalesces responses back to each lane val uncoalescer = Module( new UncoalescingUnit( numLanes, numPerLaneReqs, sourceWidth, CoalescerConsts.WORD_WIDTH, - coalDataWidth, + (1 << CoalescerConsts.MAX_SIZE), outer.numInflightCoalRequests ) ) @@ -441,8 +442,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule uncoalescer.io.coalRespSrcId := tlCoal.d.bits.source uncoalescer.io.coalRespData := tlCoal.d.bits.data - println(s"=========== coalRespData width: ${tlCoal.d.bits.data.widthOption.get}") - // Queue up synthesized uncoalesced responses into each lane's response queue (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) => lanes.zipWithIndex.foreach { case (resp, i) => @@ -508,20 +507,21 @@ class UncoalescingUnit( // Un-coalescing logic // - // FIXME: `size` should be UInt, not Int - def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, byteSize: Int): UInt = { - val bitSize = byteSize * 8 - val sizeMask = (1.U << bitSize) - 1.U + def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = { + val sizeInBits = (1.U << logSize) * 8.U assert( - dataWidth > 0 && dataWidth % bitSize == 0, - s"coalesced data width ($dataWidth) not evenly divisible by core req size ($bitSize)" + (dataWidth > 0).B && (dataWidth.U % sizeInBits === 0.U), + s"coalesced data width ($dataWidth) not evenly divisible by core req size ($sizeInBits)" ) - val numChunks = dataWidth / bitSize - val chunks = Wire(Vec(numChunks, UInt(bitSize.W))) + assert(logSize === 2.U || logSize === 0.U, "TODO: currently only supporting 4-byte accesses") + val numChunks = dataWidth / 32 + val chunks = Wire(Vec(numChunks, UInt(32.W))) val offsets = (0 until numChunks) (chunks zip offsets).foreach { case (c, o) => - // Take [(off-1)*size:off*size] starting from MSB - c := (data >> (dataWidth - (o + 1) * bitSize)) & sizeMask + // Take [(off+1)*size-1:off*size] starting from LSB + // FIXME: whether to take the offset from MSB or LSB depends on endianness + c := data(32 * (o + 1) - 1, 32 * o) + // c := (data >> (dataWidth - (o + 1) * 32)) & sizeMask } chunks(offset) // MUX } @@ -532,18 +532,16 @@ class UncoalescingUnit( perLane.reqs.zipWithIndex.foreach { case (oldReq, i) => val ioOldReq = ioPerLane(i) - // FIXME: only looking at 0th srcId entry - + // TODO: spatial-only coalescing: only looking at 0th srcId entry ioOldReq.valid := false.B ioOldReq.bits := DontCare when(inflightTable.io.lookup.valid) { ioOldReq.valid := oldReq.valid ioOldReq.bits.source := oldReq.source - // FIXME: disregard size enum for now - val byteSize = 4 + ioOldReq.bits.size := oldReq.size ioOldReq.bits.data := - getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, byteSize) + getCoalescedDataChunk(io.coalRespData, coalDataWidth, oldReq.offset, oldReq.size) } } } @@ -991,8 +989,12 @@ class MemTraceLogger( "mask HIGH bits do not match the TL size. This should have been handled by the TL generator logic" ) val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) - val mask = ~((~0.U) << (trailingZerosInMask * 8.U)) + val dataW = tlIn.params.dataBits + val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U)) req.data := mask & (tlIn.a.bits.data >> (trailingZerosInMask * 8.U)) + // when (req.valid) { + // printf("trailingZerosInMask=%d, mask=%x, data=%x\n", trailingZerosInMask, mask, req.data) + // } when(req.valid) { TracePrintf( @@ -1183,8 +1185,9 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { coreSideLogger.module.io.respBytes ) assert( - coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps, - "FAIL: number of requests and responses to the coalescer do not match" + (coreSideLogger.module.io.numReqs === coreSideLogger.module.io.numResps) && + (coreSideLogger.module.io.reqBytes === coreSideLogger.module.io.respBytes), + "FAIL: requests and responses traffic to the coalescer do not match" ) } }