Distinguish time-coalescing window from request queue depth

This commit is contained in:
Hansung Kim
2024-01-22 14:39:34 -08:00
parent b2a83c788e
commit 75d51e3d1d

View File

@@ -62,6 +62,8 @@ case class CoalescerConfig(
enable: Boolean, // globally enable or disable coalescing enable: Boolean, // globally enable or disable coalescing
numLanes: Int, // number of lanes (or threads) in a warp numLanes: Int, // number of lanes (or threads) in a warp
reqQueueDepth: Int, // request window per lane reqQueueDepth: Int, // request window per lane
timeCoalWindowSize: Int,// maximum single-lane, different-time requests that can be coalesced
// into a single request
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
addressWidth: Int, // assume <= 32 addressWidth: Int, // assume <= 32
dataBusWidth: Int, // memory-side downstream TileLink data bus size. Nominally, this has dataBusWidth: Int, // memory-side downstream TileLink data bus size. Nominally, this has
@@ -107,13 +109,17 @@ case class CoalescerConfig(
) )
w w
} }
require(timeCoalWindowSize <= reqQueueDepth,
s"time-coalescing window size (${timeCoalWindowSize}) cannot be larger " +
s"than the request queue depth (${reqQueueDepth})")
} }
object DefaultCoalescerConfig extends CoalescerConfig( object DefaultCoalescerConfig extends CoalescerConfig(
enable = true, enable = true,
numLanes = 4, numLanes = 4,
reqQueueDepth = 1, // 1-deep request queues reqQueueDepth = 2, // 1-deep request queues
timeCoalWindowSize = 1,
waitTimeout = 8, waitTimeout = 8,
addressWidth = 24, addressWidth = 24,
dataBusWidth = 4, // if "4": 2^4=16 bytes, 128 bit bus dataBusWidth = 4, // if "4": 2^4=16 bytes, 128 bit bus
@@ -366,10 +372,11 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
val enq = Vec(config.numLanes, DeqIO(gen.cloneType)) val enq = Vec(config.numLanes, DeqIO(gen.cloneType))
val deq = Vec(config.numLanes, EnqIO(gen.cloneType)) val deq = Vec(config.numLanes, EnqIO(gen.cloneType))
} }
val invalidate = Input(Valid(Vec(config.numLanes, UInt(entries.W)))) // note we're only exposing the time-coalescing window part of the queues
val invalidate = Input(Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))))
val coalescable = Input(Vec(config.numLanes, Bool())) val coalescable = Input(Vec(config.numLanes, Bool()))
val mask = Output(Vec(config.numLanes, UInt(entries.W))) val mask = Output(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))
val elts = Output(Vec(config.numLanes, Vec(entries, gen))) val windowElts = Output(Vec(config.numLanes, Vec(config.timeCoalWindowSize, gen)))
}) })
// val eltPrototype = Wire(Valid(gen)) // val eltPrototype = Wire(Valid(gen))
@@ -443,9 +450,13 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
elt.valid := false.B elt.valid := false.B
} else { } else {
elt.bits := elts(i)(j + 1).bits elt.bits := elts(i)(j + 1).bits
elt.valid := elts(i)( if (j == config.timeCoalWindowSize - 1) { // tail of time window
j + 1 elt.valid := elts(i)(j + 1).valid
).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) } else {
elt.valid := elts(i)(
j + 1
).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1))
}
} }
} }
// reset dequeue mask when new entries are shifted in // reset dequeue mask when new entries are shifted in
@@ -482,8 +493,8 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
writePtr.map(_ === writePtr.head).reduce(_ && _) writePtr.map(_ === writePtr.head).reduce(_ && _)
assert(queueInSync, "shift queue lanes are not in sync") assert(queueInSync, "shift queue lanes are not in sync")
io.mask := elts.map(x => VecInit(x.map(_.valid)).asUInt) io.mask := elts.map(lane => VecInit(lane.map(_.valid).slice(0, config.timeCoalWindowSize)).asUInt)
io.elts := elts.map(x => VecInit(x.map(_.bits))) io.windowElts := elts.map(lane => VecInit(lane.map(_.bits).slice(0, config.timeCoalWindowSize)))
} }
// Main coalescing logic that finds which lanes with valid requests can be coalesced // Main coalescing logic that finds which lanes with valid requests can be coalesced
@@ -502,11 +513,11 @@ class MonoCoalescer(
val results = Output(new Bundle { val results = Output(new Bundle {
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
val baseAddr = Output(UInt(config.addressWidth.W)) val baseAddr = Output(UInt(config.addressWidth.W))
val matchOH = Output(Vec(config.numLanes, UInt(config.reqQueueDepth.W))) val matchOH = Output(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))
// number of entries matched with this leader lane's head. // number of entries matched with this leader lane's head.
// maximum is numLanes * queueDepth // maximum is numLanes * queueDepth
val matchCount = val matchCount =
Output(UInt(log2Ceil(config.numLanes * config.reqQueueDepth + 1).W)) Output(UInt(log2Ceil(config.numLanes * config.timeCoalWindowSize + 1).W))
val coverageHits = val coverageHits =
Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W)) Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W))
val canCoalesce = Output(Vec(config.numLanes, Bool())) val canCoalesce = Output(Vec(config.numLanes, Bool()))
@@ -518,7 +529,7 @@ class MonoCoalescer(
// Combinational logic to drive output from window contents. // Combinational logic to drive output from window contents.
// The leader lanes only compare their heads against all entries of the // The leader lanes only compare their heads against all entries of the
// follower lanes. // follower lanes.
val leaders = io.window.elts.map(_.head) val leaders = io.window.windowElts.map(_.head)
val leadersValid = io.window.mask.map(_.asBools.head) val leadersValid = io.window.mask.map(_.asBools.head)
def printQueueHeads = { def printQueueHeads = {
@@ -549,7 +560,7 @@ class MonoCoalescer(
// dimensions: (leader lane, follower lane, follower entry) // dimensions: (leader lane, follower lane, follower entry)
val matchTablePerLane = (leaders zip leadersValid).map { val matchTablePerLane = (leaders zip leadersValid).map {
case (leader, leaderValid) => case (leader, leaderValid) =>
(io.window.elts zip io.window.mask).map { (io.window.windowElts zip io.window.mask).map {
case (followers, followerValids) => case (followers, followerValids) =>
// compare leader's head against follower's every queue entry // compare leader's head against follower's every queue entry
(followers zip followerValids.asBools).map { (followers zip followerValids.asBools).map {
@@ -603,7 +614,7 @@ class MonoCoalescer(
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth) def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth)
// 2-D table flattened to 1-D // 2-D table flattened to 1-D
val offsets = val offsets =
io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) io.window.windowElts.flatMap(_.map(req => getOffsetSlice(req.address)))
val valids = chosenMatches.flatMap(_.asBools) val valids = chosenMatches.flatMap(_.asBools)
// indicates for each word in the coalesced chunk whether it is accessed by // indicates for each word in the coalesced chunk whether it is accessed by
// any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four // any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four
@@ -650,7 +661,7 @@ class MultiCoalescer(
queueT: CoalShiftQueue[NonCoalescedRequest], queueT: CoalShiftQueue[NonCoalescedRequest],
coalReqT: CoalescedRequest coalReqT: CoalescedRequest
) extends Module { ) extends Module {
val invalidateT = Valid(Vec(config.numLanes, UInt(config.reqQueueDepth.W))) val invalidateT = Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))
val io = IO(new Bundle { val io = IO(new Bundle {
// coalescing window, connected to the contents of the request queues // coalescing window, connected to the contents of the request queues
val window = Input(queueT.io.cloneType) val window = Input(queueT.io.cloneType)
@@ -721,12 +732,12 @@ class MultiCoalescer(
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx) val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
// flatten requests and matches // flatten requests and matches
val flatReqs = io.window.elts.flatten val flatReqs = io.window.windowElts.flatten
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools) val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
// check for word alignment in addresses // check for word alignment in addresses
assert( assert(
io.window.elts io.window.windowElts
.flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U)) .flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U))
.zip(io.window.mask.flatMap(_.asBools)) .zip(io.window.mask.flatMap(_.asBools))
.map { case (aligned, valid) => (!valid) || aligned } .map { case (aligned, valid) => (!valid) || aligned }
@@ -783,7 +794,7 @@ class MultiCoalescer(
io.coalReq.bits.data := data.asUInt io.coalReq.bits.data := data.asUInt
io.coalReq.bits.size := chosenSize io.coalReq.bits.size := chosenSize
io.coalReq.bits.address := chosenBundle.baseAddr io.coalReq.bits.address := chosenBundle.baseAddr
io.coalReq.bits.op := io.window.elts(chosenBundle.leaderIdx).head.op io.coalReq.bits.op := io.window.windowElts(chosenBundle.leaderIdx).head.op
io.coalReq.valid := coalesceValid io.coalReq.valid := coalesceValid
io.invalidate.bits := chosenBundle.matchOH io.invalidate.bits := chosenBundle.matchOH
@@ -850,6 +861,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
println(s" numLanes: ${config.numLanes}") println(s" numLanes: ${config.numLanes}")
println(s" wordSizeInBytes: ${config.wordSizeInBytes}") println(s" wordSizeInBytes: ${config.wordSizeInBytes}")
println(s" coalLogSizes: ${config.coalLogSizes}") println(s" coalLogSizes: ${config.coalLogSizes}")
println(s" timeCoalWindowSize: ${config.timeCoalWindowSize}")
println(s" numOldSrcIds: ${config.numOldSrcIds}") println(s" numOldSrcIds: ${config.numOldSrcIds}")
println(s" numNewSrcIds: ${config.numNewSrcIds}") println(s" numNewSrcIds: ${config.numNewSrcIds}")
println(s" reqQueueDepth: ${config.reqQueueDepth}") println(s" reqQueueDepth: ${config.reqQueueDepth}")
@@ -976,7 +988,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
// Connect coalesced request to be recorded in the uncoalescer table. // Connect coalesced request to be recorded in the uncoalescer table.
inflightTable.io.inCoalReq <> coalSourceGen.io.outReq inflightTable.io.inCoalReq <> coalSourceGen.io.outReq
inflightTable.io.invalidate := coalescer.io.invalidate inflightTable.io.invalidate := coalescer.io.invalidate
inflightTable.io.windowElts := reqQueues.io.elts inflightTable.io.windowElts := reqQueues.io.windowElts
// This is the final coalesced request. // This is the final coalesced request.
val coalReq = inflightTable.io.outCoalReq val coalReq = inflightTable.io.outCoalReq
@@ -1016,7 +1028,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
// The maximum number of requests from a single lane that can go into a // The maximum number of requests from a single lane that can go into a
// coalesced request. // coalesced request.
val numPerLaneReqs = config.reqQueueDepth val numPerLaneReqs = config.timeCoalWindowSize
// FIXME: no need to contain maxCoalLogSize data // FIXME: no need to contain maxCoalLogSize data
val respQueueEntryT = new Response( val respQueueEntryT = new Response(
@@ -1117,11 +1129,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
// Connect uncoalescer results back into response queue // Connect uncoalescer results back into response queue
(respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach { (respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach {
case ((q, sameLaneUncoalResps), lane) => case ((q, sameLaneUncoalResps), lane) =>
// reqQueueDepth here is the maximum number of same-lane, different-time // timeCoalWindowSize is the maximum number of same-lane, different-time
// requests that can go into a single coalesced response. We need to have // requests that can go into a single coalesced response. We need to
// that many enq ports to not backpressure the uncoalescer. // have that many enq ports to not backpressure the uncoalescer.
require( require(
q.io.enq.length == config.reqQueueDepth + respQueueUncoalPortOffset, q.io.enq.length == config.timeCoalWindowSize + respQueueUncoalPortOffset,
s"wrong number of enq ports for MultiPort response queue" s"wrong number of enq ports for MultiPort response queue"
) )
// slice the ports reserved for uncoalesced response // slice the ports reserved for uncoalesced response
@@ -1161,10 +1173,7 @@ class Uncoalescer(
val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) val coalResp = Flipped(Decoupled(new CoalescedResponse(config)))
val respQueueIO = Vec( val respQueueIO = Vec(
config.numLanes, config.numLanes,
// reqQueueDepth because if we're doing time-coalescing, that's the Vec(config.timeCoalWindowSize, Decoupled(new NonCoalescedResponse(config)))
// maximum number of same-lane, different-time requests that can go into
// a single coalesced request.
Vec(config.reqQueueDepth, Decoupled(new NonCoalescedResponse(config)))
) )
}) })
@@ -1269,7 +1278,7 @@ class InFlightTable(
config.maxCoalLogSize - config.wordSizeWidth // assumes word offset config.maxCoalLogSize - config.wordSizeWidth // assumes word offset
val entryT = new InFlightTableEntry( val entryT = new InFlightTableEntry(
config.numLanes, config.numLanes,
config.reqQueueDepth, config.timeCoalWindowSize,
log2Ceil(config.numOldSrcIds), log2Ceil(config.numOldSrcIds),
log2Ceil(config.numNewSrcIds), log2Ceil(config.numNewSrcIds),
config.maxCoalLogSize, // FIXME: offsetBits? config.maxCoalLogSize, // FIXME: offsetBits?
@@ -1289,12 +1298,12 @@ class InFlightTable(
// invalidate signal coming out of coalescer. Needed to generate new entry // invalidate signal coming out of coalescer. Needed to generate new entry
// for the table. // for the table.
val invalidate = val invalidate =
Input(Valid(Vec(config.numLanes, UInt(config.reqQueueDepth.W)))) Input(Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))))
// coalescing window, connected to the contents of the request queues. // coalescing window, connected to the contents of the request queues.
// Need this to generate new entry for the table. // Need this to generate new entry for the table.
// TODO: duplicate type construction // TODO: duplicate type construction
val windowElts = val windowElts =
Input(Vec(config.numLanes, Vec(config.reqQueueDepth, nonCoalReqT))) Input(Vec(config.numLanes, Vec(config.timeCoalWindowSize, nonCoalReqT)))
// InflightTable simply passes through the inCoalReq to outCoalReq, only snooping // InflightTable simply passes through the inCoalReq to outCoalReq, only snooping
// on its data to record what's necessary. // on its data to record what's necessary.
val outCoalReq = Decoupled(coalReqT) val outCoalReq = Decoupled(coalReqT)