diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index da79370..61774c7 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -92,7 +92,7 @@ object defaultConfig extends CoalescerConfig( // watermark = 2, wordSizeInBytes = 4, // when attaching to SoC, 16 source IDs are not enough due to longer latency - numOldSrcIds = 16, + numOldSrcIds = 8, numNewSrcIds = 8, respQueueDepth = 8, coalLogSizes = Seq(3), @@ -947,48 +947,33 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) dontTouch(tlOut.d) } - // connect coalesced request that is newly generated and being recorded in - // the uncoalescer + // Uncoalescer input + // + // connect coalesced request to be recorded in the uncoalescer table uncoalescer.io.coalReq <> coalReq - // We can't simply use coalescer.io.coalReq.valid here. - // coalescer.io.coalReq.valid tells us when there exists a valid coalescing - // combination, but not when we can actually fire that to downstream, because - // we can still be blocked by source ID clashes due to backpressure. - // So, we have to overwrite just the valid bit with the final valid that - // indicates when we can send this request out. - // NOTE(hansung): this feels slightly awkward. Maybe doing sourcegen inside - // the coalescer so that it gives the final call is better, but that may be - // too much IO for the coalescer. uncoalescer.io.invalidate := coalescer.io.invalidate uncoalescer.io.windowElts := reqQueues.io.elts - // connect coalesced response going into the uncoalescer, ready to be - // uncoalesced - // Cleanup: custom <>? + // coalesced response to be used to look up the uncoalescer table uncoalescer.io.coalResp.valid := tlCoal.d.valid uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits) + + // Uncoalescer output + // + // Connect uncoalescer results back into response queue + (respQueues zip uncoalescer.io.respQueueIO).foreach { case (q, uncoalEnqs) => + require(q.io.enq.length == config.queueDepth + respQueueUncoalPortOffset, + s"wrong number of enq ports for MultiPort response queue") + // slice the ports reserved for uncoalesced response + val qUncoalEnqs = q.io.enq.slice(respQueueUncoalPortOffset, q.io.enq.length) + (qUncoalEnqs zip uncoalEnqs).foreach { + case (enq, uncoalEnq) => { + enq <> uncoalEnq + } + } + } // uncoalescer backpressure tlCoal.d.ready := uncoalescer.io.coalResp.ready - // Connect uncoalescer results back into each lane's response queue - (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { - case ((q, perLaneResps), lane) => - perLaneResps.zipWithIndex.foreach { case (resp, i) => - // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream - // cache. This should ideally not happen though. - assert( - q.io.enq(respQueueUncoalPortOffset + i).ready, - s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}" - ) - q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid - q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits - // debug - // when (resp.valid) { - // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") - // } - // dontTouch(q.io.enq(respQueueCoalPortOffset)) - } - } - // Debug dontTouch(coalescer.io.coalReq) val coalRespData = tlCoal.d.bits.data @@ -1017,11 +1002,8 @@ class Uncoalescer( // TODO: duplicate type construction val windowElts = Input(Vec(config.numLanes, Vec(config.queueDepth, nonCoalReqT))) val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) - val uncoalResps = Output( - Vec( - config.numLanes, - Vec(config.queueDepth, ValidIO(new NonCoalescedResponse(config))) - ) + val respQueueIO = Vec(config.numLanes, + Vec(config.queueDepth, Decoupled(new NonCoalescedResponse(config))) ) }) @@ -1097,25 +1079,37 @@ class Uncoalescer( } // Un-coalesce responses back to individual lanes - val found = inflightTable.io.lookup.bits - (found.lanes zip io.uncoalResps).foreach { case (perLane, ioPerLane) => - perLane.reqs.zipWithIndex.foreach { case (oldReq, depth) => - val ioOldReq = ioPerLane(depth) + // Connect uncoalesced results back into each lane's response queue + val foundRow = inflightTable.io.lookup.bits + (foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, ioEnqs), lane) => + foundLane.reqs.zipWithIndex.foreach { case (foundReq, depth) => + val ioEnq = ioEnqs(depth) + // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream + // cache. This should ideally not happen though. + assert( + ioEnq.ready, + s"respQueue: enq port for ${depth}-th uncoalesced response is blocked for lane ${lane}" + ) // TODO: spatial-only coalescing: only looking at 0th srcId entry - ioOldReq.valid := false.B - ioOldReq.bits := DontCare + ioEnq.valid := false.B + ioEnq.bits := DontCare + // debug + // when (resp.valid) { + // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") + // } + // dontTouch(q.io.enq(respQueueCoalPortOffset)) - when(inflightTable.io.lookup.valid && oldReq.valid) { - ioOldReq.valid := oldReq.valid - ioOldReq.bits.source := oldReq.source - val logSize = found.sizeEnumT.enumToLogSize(oldReq.sizeEnum) - ioOldReq.bits.size := logSize - ioOldReq.bits.data := + when(inflightTable.io.lookup.valid && foundReq.valid) { + ioEnq.valid := foundReq.valid + ioEnq.bits.source := foundReq.source + val logSize = foundRow.sizeEnumT.enumToLogSize(foundReq.sizeEnum) + ioEnq.bits.size := logSize + ioEnq.bits.data := getCoalescedDataChunk( io.coalResp.bits.data, io.coalResp.bits.data.getWidth, - oldReq.offset, + foundReq.offset, logSize ) }