From 75d51e3d1da0cf7f3b4ba15f3df3766ab1c1fcb0 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 22 Jan 2024 14:39:34 -0800 Subject: [PATCH] Distinguish time-coalescing window from request queue depth --- .../scala/radiance/memory/Coalescing.scala | 71 +++++++++++-------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/src/main/scala/radiance/memory/Coalescing.scala b/src/main/scala/radiance/memory/Coalescing.scala index a3a4432..8600a59 100644 --- a/src/main/scala/radiance/memory/Coalescing.scala +++ b/src/main/scala/radiance/memory/Coalescing.scala @@ -62,6 +62,8 @@ case class CoalescerConfig( enable: Boolean, // globally enable or disable coalescing numLanes: Int, // number of lanes (or threads) in a warp reqQueueDepth: Int, // request window per lane + timeCoalWindowSize: Int,// maximum single-lane, different-time requests that can be coalesced + // into a single request waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane addressWidth: Int, // assume <= 32 dataBusWidth: Int, // memory-side downstream TileLink data bus size. Nominally, this has @@ -107,13 +109,17 @@ case class CoalescerConfig( ) w } + require(timeCoalWindowSize <= reqQueueDepth, + s"time-coalescing window size (${timeCoalWindowSize}) cannot be larger " + + s"than the request queue depth (${reqQueueDepth})") } object DefaultCoalescerConfig extends CoalescerConfig( enable = true, numLanes = 4, - reqQueueDepth = 1, // 1-deep request queues + reqQueueDepth = 2, // 1-deep request queues + timeCoalWindowSize = 1, waitTimeout = 8, addressWidth = 24, dataBusWidth = 4, // if "4": 2^4=16 bytes, 128 bit bus @@ -366,10 +372,11 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) val enq = Vec(config.numLanes, DeqIO(gen.cloneType)) val deq = Vec(config.numLanes, EnqIO(gen.cloneType)) } - val invalidate = Input(Valid(Vec(config.numLanes, UInt(entries.W)))) + // note we're only exposing the time-coalescing window part of the queues + val invalidate = Input(Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))) val coalescable = Input(Vec(config.numLanes, Bool())) - val mask = Output(Vec(config.numLanes, UInt(entries.W))) - val elts = Output(Vec(config.numLanes, Vec(entries, gen))) + val mask = Output(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))) + val windowElts = Output(Vec(config.numLanes, Vec(config.timeCoalWindowSize, gen))) }) // val eltPrototype = Wire(Valid(gen)) @@ -443,9 +450,13 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) elt.valid := false.B } else { elt.bits := elts(i)(j + 1).bits - elt.valid := elts(i)( - j + 1 - ).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) + if (j == config.timeCoalWindowSize - 1) { // tail of time window + elt.valid := elts(i)(j + 1).valid + } else { + elt.valid := elts(i)( + j + 1 + ).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) + } } } // reset dequeue mask when new entries are shifted in @@ -482,8 +493,8 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) writePtr.map(_ === writePtr.head).reduce(_ && _) assert(queueInSync, "shift queue lanes are not in sync") - io.mask := elts.map(x => VecInit(x.map(_.valid)).asUInt) - io.elts := elts.map(x => VecInit(x.map(_.bits))) + io.mask := elts.map(lane => VecInit(lane.map(_.valid).slice(0, config.timeCoalWindowSize)).asUInt) + io.windowElts := elts.map(lane => VecInit(lane.map(_.bits).slice(0, config.timeCoalWindowSize))) } // Main coalescing logic that finds which lanes with valid requests can be coalesced @@ -502,11 +513,11 @@ class MonoCoalescer( val results = Output(new Bundle { val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val baseAddr = Output(UInt(config.addressWidth.W)) - val matchOH = Output(Vec(config.numLanes, UInt(config.reqQueueDepth.W))) + val matchOH = Output(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))) // number of entries matched with this leader lane's head. // maximum is numLanes * queueDepth val matchCount = - Output(UInt(log2Ceil(config.numLanes * config.reqQueueDepth + 1).W)) + Output(UInt(log2Ceil(config.numLanes * config.timeCoalWindowSize + 1).W)) val coverageHits = Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W)) val canCoalesce = Output(Vec(config.numLanes, Bool())) @@ -518,7 +529,7 @@ class MonoCoalescer( // Combinational logic to drive output from window contents. // The leader lanes only compare their heads against all entries of the // follower lanes. - val leaders = io.window.elts.map(_.head) + val leaders = io.window.windowElts.map(_.head) val leadersValid = io.window.mask.map(_.asBools.head) def printQueueHeads = { @@ -549,7 +560,7 @@ class MonoCoalescer( // dimensions: (leader lane, follower lane, follower entry) val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) => - (io.window.elts zip io.window.mask).map { + (io.window.windowElts zip io.window.mask).map { case (followers, followerValids) => // compare leader's head against follower's every queue entry (followers zip followerValids.asBools).map { @@ -603,7 +614,7 @@ class MonoCoalescer( def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth) // 2-D table flattened to 1-D val offsets = - io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) + io.window.windowElts.flatMap(_.map(req => getOffsetSlice(req.address))) val valids = chosenMatches.flatMap(_.asBools) // indicates for each word in the coalesced chunk whether it is accessed by // any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four @@ -650,7 +661,7 @@ class MultiCoalescer( queueT: CoalShiftQueue[NonCoalescedRequest], coalReqT: CoalescedRequest ) extends Module { - val invalidateT = Valid(Vec(config.numLanes, UInt(config.reqQueueDepth.W))) + val invalidateT = Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))) val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues val window = Input(queueT.io.cloneType) @@ -721,12 +732,12 @@ class MultiCoalescer( val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx) // flatten requests and matches - val flatReqs = io.window.elts.flatten + val flatReqs = io.window.windowElts.flatten val flatMatches = chosenBundle.matchOH.flatMap(_.asBools) // check for word alignment in addresses assert( - io.window.elts + io.window.windowElts .flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U)) .zip(io.window.mask.flatMap(_.asBools)) .map { case (aligned, valid) => (!valid) || aligned } @@ -783,7 +794,7 @@ class MultiCoalescer( io.coalReq.bits.data := data.asUInt io.coalReq.bits.size := chosenSize io.coalReq.bits.address := chosenBundle.baseAddr - io.coalReq.bits.op := io.window.elts(chosenBundle.leaderIdx).head.op + io.coalReq.bits.op := io.window.windowElts(chosenBundle.leaderIdx).head.op io.coalReq.valid := coalesceValid io.invalidate.bits := chosenBundle.matchOH @@ -850,6 +861,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) println(s" numLanes: ${config.numLanes}") println(s" wordSizeInBytes: ${config.wordSizeInBytes}") println(s" coalLogSizes: ${config.coalLogSizes}") + println(s" timeCoalWindowSize: ${config.timeCoalWindowSize}") println(s" numOldSrcIds: ${config.numOldSrcIds}") println(s" numNewSrcIds: ${config.numNewSrcIds}") println(s" reqQueueDepth: ${config.reqQueueDepth}") @@ -976,7 +988,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // Connect coalesced request to be recorded in the uncoalescer table. inflightTable.io.inCoalReq <> coalSourceGen.io.outReq inflightTable.io.invalidate := coalescer.io.invalidate - inflightTable.io.windowElts := reqQueues.io.elts + inflightTable.io.windowElts := reqQueues.io.windowElts // This is the final coalesced request. val coalReq = inflightTable.io.outCoalReq @@ -1016,7 +1028,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // The maximum number of requests from a single lane that can go into a // coalesced request. - val numPerLaneReqs = config.reqQueueDepth + val numPerLaneReqs = config.timeCoalWindowSize // FIXME: no need to contain maxCoalLogSize data val respQueueEntryT = new Response( @@ -1117,11 +1129,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // Connect uncoalescer results back into response queue (respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach { case ((q, sameLaneUncoalResps), lane) => - // reqQueueDepth here is the maximum number of same-lane, different-time - // requests that can go into a single coalesced response. We need to have - // that many enq ports to not backpressure the uncoalescer. + // timeCoalWindowSize is the maximum number of same-lane, different-time + // requests that can go into a single coalesced response. We need to + // have that many enq ports to not backpressure the uncoalescer. require( - q.io.enq.length == config.reqQueueDepth + respQueueUncoalPortOffset, + q.io.enq.length == config.timeCoalWindowSize + respQueueUncoalPortOffset, s"wrong number of enq ports for MultiPort response queue" ) // slice the ports reserved for uncoalesced response @@ -1161,10 +1173,7 @@ class Uncoalescer( val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) val respQueueIO = Vec( config.numLanes, - // reqQueueDepth because if we're doing time-coalescing, that's the - // maximum number of same-lane, different-time requests that can go into - // a single coalesced request. - Vec(config.reqQueueDepth, Decoupled(new NonCoalescedResponse(config))) + Vec(config.timeCoalWindowSize, Decoupled(new NonCoalescedResponse(config))) ) }) @@ -1269,7 +1278,7 @@ class InFlightTable( config.maxCoalLogSize - config.wordSizeWidth // assumes word offset val entryT = new InFlightTableEntry( config.numLanes, - config.reqQueueDepth, + config.timeCoalWindowSize, log2Ceil(config.numOldSrcIds), log2Ceil(config.numNewSrcIds), config.maxCoalLogSize, // FIXME: offsetBits? @@ -1289,12 +1298,12 @@ class InFlightTable( // invalidate signal coming out of coalescer. Needed to generate new entry // for the table. val invalidate = - Input(Valid(Vec(config.numLanes, UInt(config.reqQueueDepth.W)))) + Input(Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))) // coalescing window, connected to the contents of the request queues. // Need this to generate new entry for the table. // TODO: duplicate type construction val windowElts = - Input(Vec(config.numLanes, Vec(config.reqQueueDepth, nonCoalReqT))) + Input(Vec(config.numLanes, Vec(config.timeCoalWindowSize, nonCoalReqT))) // InflightTable simply passes through the inCoalReq to outCoalReq, only snooping // on its data to record what's necessary. val outCoalReq = Decoupled(coalReqT)