From 0df319288275f3b683ff2301c921c1e84a0a225c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 17:31:51 -0700 Subject: [PATCH] Revamp uncoalescer IO Connect coalescer output directly to the uncoalescer at the toplevel, and do table entry construction entirely inside the module. WIP: unittest is very broken as a result of this. --- src/main/scala/tilelink/Coalescing.scala | 196 ++++++++++++----------- 1 file changed, 105 insertions(+), 91 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 98eb668..e7e69f8 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -41,6 +41,13 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum { } } +// Mapping to reference model param names +// numLanes: Int, <-> config.NUM_LANES +// numPerLaneReqs: Int, <-> config.DEPTH +// sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS) +// sizeWidth: Int, <-> config.sizeEnum.width +// coalDataWidth: Int, <-> (1 << config.MAX_SIZE) +// numInflightCoalRequests: Int <-> config.NUM_NEW_IDS case class CoalescerConfig( enable: Boolean, // globally enable or disable coalescing numLanes: Int, // number of lanes (or threads) in a warp @@ -137,7 +144,8 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La // Protocol-agnostic bundles that represent a request and a response to the // coalescer. -class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) extends Bundle { +class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) + extends Bundle { require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8") val op = UInt(1.W) // 0=READ 1=WRITE val address = UInt(addressWidth.W) @@ -181,6 +189,7 @@ case class CoalescedRequest(config: CoalescerConfig) class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { + require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8") val op = UInt(1.W) // 0=READ 1=WRITE val size = UInt(sizeWidth.W) val source = UInt(sourceWidth.W) @@ -382,10 +391,10 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) class MonoCoalescer( config: CoalescerConfig, coalLogSize: Int, - windowT: CoalShiftQueue[NonCoalescedRequest] + queueT: CoalShiftQueue[NonCoalescedRequest] ) extends Module { val io = IO(new Bundle { - val window = Input(windowT.io.cloneType) + val window = Input(queueT.io.cloneType) val results = Output(new Bundle { val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val baseAddr = Output(UInt(config.addressWidth.W)) @@ -478,7 +487,8 @@ class MonoCoalescer( val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts) val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux - // matchTable for the chosen lane, but converted to a Vec[UInt] + // matchTable for the chosen lane, but each column converted to bitflags, + // i.e. Vec[UInt] val chosenMatches = VecInit(matchTablePerLane.map { table => VecInit(table.map(VecInit(_).asUInt)) })(chosenLeaderIdx) @@ -532,23 +542,25 @@ class MonoCoalescer( // Software model: coalescer.py class MultiCoalescer( config: CoalescerConfig, - windowT: CoalShiftQueue[NonCoalescedRequest], + queueT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Request, ) extends Module { + val invalidateT = Valid(Vec(config.numLanes, UInt(config.queueDepth.W))) val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues - val window = Input(windowT.io.cloneType) + val window = Input(queueT.io.cloneType) // generated coalesced request val coalReq = DecoupledIO(coalReqT.cloneType) - // invalidate signals going into each request queue's head - val invalidate = - Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) - // whether a lane is coalescable + // invalidate signals going into each request queue's head. Lanes with + // high invalidate bits are what became coalesced into the new request. + val invalidate = Output(invalidateT) + // whether a lane is coalescable. This is used to output non-coalescable + // lanes to the arbiter so they can be flushed to downstream. val coalescable = Output(Vec(config.numLanes, Bool())) }) val coalescers = config.coalLogSizes.map(size => - Module(new MonoCoalescer(config, size, windowT)) + Module(new MonoCoalescer(config, size, queueT)) ) coalescers.foreach(_.io.window := io.window) @@ -701,11 +713,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " + s"mismatch with config.addressWidth (${config.addressWidth})" ) + require( + config.maxCoalLogSize <= config.dataBusWidth, + "multi-beat coalesced reads/writes are currently not supported" + ) val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits - val reqQueueEntryT = new NonCoalescedRequest(config) + val nonCoalReqT = new NonCoalescedRequest(config) val reqQueues = Module( - new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config) + new CoalShiftQueue(nonCoalReqT, config.queueDepth, config) ) val coalReqT = new CoalescedRequest(config) @@ -725,7 +741,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) (outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach { case (((tlIn, _), (tlOut, edgeOut)), lane) => // Request queue - val req = Wire(reqQueueEntryT) + val req = Wire(nonCoalReqT) req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode) req.source := tlIn.a.bits.source @@ -782,6 +798,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. tlCoal.e.valid := false.B + require( + tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, + s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" + + s" (${(1 << config.dataBusWidth) * 8})" + ) + // =========================================================================== // Response flow // =========================================================================== @@ -870,57 +892,21 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) dontTouch(tlOut.d) } - // Construct new entry for the inflight table - // FIXME: don't instantiate inflight table entry type here. It leaks the table's impl - // detail to the coalescer - - // richard: I think a good idea is to pass Valid[ReqQueueEntry] generated by - // the coalescer directly into the uncoalescer, so that we can offload the - // logic to generate the Inflight Entry into the uncoalescer, where it should be. - // this also reduces top level clutter. - - val uncoalescer = Module(new Uncoalescer(config)) - - val newEntry = Wire(uncoalescer.inflightTable.entryT) - newEntry.source := coalescer.io.coalReq.bits.source - - assert( - config.maxCoalLogSize <= config.dataBusWidth, - "multi-beat coalesced reads/writes are currently not supported" - ) - assert( - tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, - s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" - + s" (${(1 << config.dataBusWidth) * 8})" + val uncoalescer = Module( + new Uncoalescer(config, reqQueues, coalReqT, coalescer.invalidateT) ) + // connect coalesced request that is newly generated and being recorded in + // the uncoalescer + uncoalescer.io.coalReq <> coalescer.io.coalReq + uncoalescer.io.invalidate := coalescer.io.invalidate val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits) - // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the - // coalescer to every (numLanes * queueDepth) entry in the inflight table. - (newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex - .foreach { case ((laneEntry, laneInv), lane) => - (laneEntry.reqs zip laneInv.asBools).zipWithIndex - .foreach { case ((reqEntry, inv), i) => - val req = reqQueues.io.elts(lane)(i) - when((coalescer.io.invalidate.valid && inv)) { - printf( - s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", - req.source - ) - } - reqEntry.valid := (coalescer.io.invalidate.valid && inv) - reqEntry.source := req.source - reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth) - reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size) - // TODO: load/store op - } - } - dontTouch(newEntry) - - uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid - uncoalescer.io.newEntry := newEntry + uncoalescer.io.window := reqQueues.io + // connect coalesced response going into the uncoalescer, ready to be + // uncoalesced // Cleanup: custom <>? uncoalescer.io.coalResp.valid := tlCoal.d.valid uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits) + // uncoalescer backpressure tlCoal.d.ready := uncoalescer.io.coalResp.ready // Connect uncoalescer results back into each lane's response queue @@ -935,11 +921,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) ) q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits - // debug - // when (resp.valid) { - // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") - // } - // dontTouch(q.io.enq(respQueueCoalPortOffset)) + // debug + // when (resp.valid) { + // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") + // } + // dontTouch(q.io.enq(respQueueCoalPortOffset)) } } @@ -952,48 +938,76 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) dontTouch(tlCoal.d) } -class Uncoalescer(config: CoalescerConfig) extends Module { - // Mapping to reference model param names - // val numLanes: Int, <-> config.NUM_LANES - // val numPerLaneReqs: Int, <-> config.DEPTH - // val sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS) - // val sizeWidth: Int, <-> config.sizeEnum.width - // val coalDataWidth: Int, <-> (1 << config.MAX_SIZE) - // val numInflightCoalRequests: Int <-> config.NUM_NEW_IDS +class Uncoalescer( + config: CoalescerConfig, + queueT: CoalShiftQueue[NonCoalescedRequest], + coalReqT: Request, + coalInvalidateT: Valid[Vec[UInt]], +) extends Module { val inflightTable = Module(new InflightCoalReqTable(config)) val io = IO(new Bundle { - val coalReqValid = Input(Bool()) - // FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer - val newEntry = Input(inflightTable.entryT.cloneType) + // generated coalesced request, connected to the output of the coalescer. + val coalReq = Flipped(DecoupledIO(coalReqT.cloneType)) + // invalidate signal coming out of coalescer. + val invalidate = Input(coalInvalidateT.cloneType) + // coalescing window, connected to the contents of the request queues. + // Uncoalescer looks at the queue entries that got coalesced into `coalReq` + // in order to record which lanes this coalReq originally came from. + val window = Input(queueT.io.cloneType) val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) val uncoalResps = Output( Vec( config.numLanes, - Vec( - config.queueDepth, - ValidIO( - new NonCoalescedResponse(config) - ) - ) + Vec(config.queueDepth, ValidIO(new NonCoalescedResponse(config))) ) ) }) - // Populate inflight table - inflightTable.io.enq.valid := io.coalReqValid - inflightTable.io.enq.bits := io.newEntry + // Uncoalescer has to be always ready to accept and record new coalesced + // requests, so that it doesn't stall the coalescer. + io.coalReq.ready := true.B + + // Construct a new entry for the inflight table using generated coalesced request + def generateInflightTableEntry: InflightCoalReqTableEntry = { + val newEntry = Wire(inflightTable.entryT) + newEntry.source := io.coalReq.bits.source + // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the + // coalescer to every (numLanes * queueDepth) entry in the inflight table. + (newEntry.lanes zip io.invalidate.bits).zipWithIndex + .foreach { case ((laneEntry, laneInv), lane) => + (laneEntry.reqs zip laneInv.asBools).zipWithIndex + .foreach { case ((reqEntry, inv), i) => + val req = io.window.elts(lane)(i) + when((io.invalidate.valid && inv)) { + printf( + s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", + req.source + ) + } + reqEntry.valid := (io.invalidate.valid && inv) + reqEntry.source := req.source + reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth) + reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size) + // TODO: load/store op + } + } + assert( + !((io.coalReq.valid === true.B) && (io.coalResp.valid === true.B) && + (newEntry.source === io.coalResp.bits.source)), + "inflight table: enqueueing and looking up the same srcId at the same cycle is not handled" + ) + dontTouch(newEntry) + + newEntry + } + inflightTable.io.enq.valid := io.coalReq.valid + inflightTable.io.enq.bits := generateInflightTableEntry // Look up the table with incoming coalesced responses inflightTable.io.lookup.ready := io.coalResp.valid inflightTable.io.lookupSourceId := io.coalResp.bits.source io.coalResp.ready := true.B // FIXME, see sw model implementation - assert( - !((io.coalReqValid === true.B) && (io.coalResp.valid === true.B) && - (io.newEntry.source === io.coalResp.bits.source)), - "inflight table: enqueueing and looking up the same srcId at the same cycle is not handled" - ) - // Un-coalescing logic // def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {