Distinguish time-coalescing window from request queue depth
This commit is contained in:
@@ -62,6 +62,8 @@ case class CoalescerConfig(
|
|||||||
enable: Boolean, // globally enable or disable coalescing
|
enable: Boolean, // globally enable or disable coalescing
|
||||||
numLanes: Int, // number of lanes (or threads) in a warp
|
numLanes: Int, // number of lanes (or threads) in a warp
|
||||||
reqQueueDepth: Int, // request window per lane
|
reqQueueDepth: Int, // request window per lane
|
||||||
|
timeCoalWindowSize: Int,// maximum single-lane, different-time requests that can be coalesced
|
||||||
|
// into a single request
|
||||||
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||||
addressWidth: Int, // assume <= 32
|
addressWidth: Int, // assume <= 32
|
||||||
dataBusWidth: Int, // memory-side downstream TileLink data bus size. Nominally, this has
|
dataBusWidth: Int, // memory-side downstream TileLink data bus size. Nominally, this has
|
||||||
@@ -107,13 +109,17 @@ case class CoalescerConfig(
|
|||||||
)
|
)
|
||||||
w
|
w
|
||||||
}
|
}
|
||||||
|
require(timeCoalWindowSize <= reqQueueDepth,
|
||||||
|
s"time-coalescing window size (${timeCoalWindowSize}) cannot be larger " +
|
||||||
|
s"than the request queue depth (${reqQueueDepth})")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
object DefaultCoalescerConfig extends CoalescerConfig(
|
object DefaultCoalescerConfig extends CoalescerConfig(
|
||||||
enable = true,
|
enable = true,
|
||||||
numLanes = 4,
|
numLanes = 4,
|
||||||
reqQueueDepth = 1, // 1-deep request queues
|
reqQueueDepth = 2, // 1-deep request queues
|
||||||
|
timeCoalWindowSize = 1,
|
||||||
waitTimeout = 8,
|
waitTimeout = 8,
|
||||||
addressWidth = 24,
|
addressWidth = 24,
|
||||||
dataBusWidth = 4, // if "4": 2^4=16 bytes, 128 bit bus
|
dataBusWidth = 4, // if "4": 2^4=16 bytes, 128 bit bus
|
||||||
@@ -366,10 +372,11 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
|
|||||||
val enq = Vec(config.numLanes, DeqIO(gen.cloneType))
|
val enq = Vec(config.numLanes, DeqIO(gen.cloneType))
|
||||||
val deq = Vec(config.numLanes, EnqIO(gen.cloneType))
|
val deq = Vec(config.numLanes, EnqIO(gen.cloneType))
|
||||||
}
|
}
|
||||||
val invalidate = Input(Valid(Vec(config.numLanes, UInt(entries.W))))
|
// note we're only exposing the time-coalescing window part of the queues
|
||||||
|
val invalidate = Input(Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))))
|
||||||
val coalescable = Input(Vec(config.numLanes, Bool()))
|
val coalescable = Input(Vec(config.numLanes, Bool()))
|
||||||
val mask = Output(Vec(config.numLanes, UInt(entries.W)))
|
val mask = Output(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))
|
||||||
val elts = Output(Vec(config.numLanes, Vec(entries, gen)))
|
val windowElts = Output(Vec(config.numLanes, Vec(config.timeCoalWindowSize, gen)))
|
||||||
})
|
})
|
||||||
|
|
||||||
// val eltPrototype = Wire(Valid(gen))
|
// val eltPrototype = Wire(Valid(gen))
|
||||||
@@ -443,9 +450,13 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
|
|||||||
elt.valid := false.B
|
elt.valid := false.B
|
||||||
} else {
|
} else {
|
||||||
elt.bits := elts(i)(j + 1).bits
|
elt.bits := elts(i)(j + 1).bits
|
||||||
elt.valid := elts(i)(
|
if (j == config.timeCoalWindowSize - 1) { // tail of time window
|
||||||
j + 1
|
elt.valid := elts(i)(j + 1).valid
|
||||||
).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1))
|
} else {
|
||||||
|
elt.valid := elts(i)(
|
||||||
|
j + 1
|
||||||
|
).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// reset dequeue mask when new entries are shifted in
|
// reset dequeue mask when new entries are shifted in
|
||||||
@@ -482,8 +493,8 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
|
|||||||
writePtr.map(_ === writePtr.head).reduce(_ && _)
|
writePtr.map(_ === writePtr.head).reduce(_ && _)
|
||||||
assert(queueInSync, "shift queue lanes are not in sync")
|
assert(queueInSync, "shift queue lanes are not in sync")
|
||||||
|
|
||||||
io.mask := elts.map(x => VecInit(x.map(_.valid)).asUInt)
|
io.mask := elts.map(lane => VecInit(lane.map(_.valid).slice(0, config.timeCoalWindowSize)).asUInt)
|
||||||
io.elts := elts.map(x => VecInit(x.map(_.bits)))
|
io.windowElts := elts.map(lane => VecInit(lane.map(_.bits).slice(0, config.timeCoalWindowSize)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main coalescing logic that finds which lanes with valid requests can be coalesced
|
// Main coalescing logic that finds which lanes with valid requests can be coalesced
|
||||||
@@ -502,11 +513,11 @@ class MonoCoalescer(
|
|||||||
val results = Output(new Bundle {
|
val results = Output(new Bundle {
|
||||||
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
||||||
val baseAddr = Output(UInt(config.addressWidth.W))
|
val baseAddr = Output(UInt(config.addressWidth.W))
|
||||||
val matchOH = Output(Vec(config.numLanes, UInt(config.reqQueueDepth.W)))
|
val matchOH = Output(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))
|
||||||
// number of entries matched with this leader lane's head.
|
// number of entries matched with this leader lane's head.
|
||||||
// maximum is numLanes * queueDepth
|
// maximum is numLanes * queueDepth
|
||||||
val matchCount =
|
val matchCount =
|
||||||
Output(UInt(log2Ceil(config.numLanes * config.reqQueueDepth + 1).W))
|
Output(UInt(log2Ceil(config.numLanes * config.timeCoalWindowSize + 1).W))
|
||||||
val coverageHits =
|
val coverageHits =
|
||||||
Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W))
|
Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W))
|
||||||
val canCoalesce = Output(Vec(config.numLanes, Bool()))
|
val canCoalesce = Output(Vec(config.numLanes, Bool()))
|
||||||
@@ -518,7 +529,7 @@ class MonoCoalescer(
|
|||||||
// Combinational logic to drive output from window contents.
|
// Combinational logic to drive output from window contents.
|
||||||
// The leader lanes only compare their heads against all entries of the
|
// The leader lanes only compare their heads against all entries of the
|
||||||
// follower lanes.
|
// follower lanes.
|
||||||
val leaders = io.window.elts.map(_.head)
|
val leaders = io.window.windowElts.map(_.head)
|
||||||
val leadersValid = io.window.mask.map(_.asBools.head)
|
val leadersValid = io.window.mask.map(_.asBools.head)
|
||||||
|
|
||||||
def printQueueHeads = {
|
def printQueueHeads = {
|
||||||
@@ -549,7 +560,7 @@ class MonoCoalescer(
|
|||||||
// dimensions: (leader lane, follower lane, follower entry)
|
// dimensions: (leader lane, follower lane, follower entry)
|
||||||
val matchTablePerLane = (leaders zip leadersValid).map {
|
val matchTablePerLane = (leaders zip leadersValid).map {
|
||||||
case (leader, leaderValid) =>
|
case (leader, leaderValid) =>
|
||||||
(io.window.elts zip io.window.mask).map {
|
(io.window.windowElts zip io.window.mask).map {
|
||||||
case (followers, followerValids) =>
|
case (followers, followerValids) =>
|
||||||
// compare leader's head against follower's every queue entry
|
// compare leader's head against follower's every queue entry
|
||||||
(followers zip followerValids.asBools).map {
|
(followers zip followerValids.asBools).map {
|
||||||
@@ -603,7 +614,7 @@ class MonoCoalescer(
|
|||||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth)
|
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth)
|
||||||
// 2-D table flattened to 1-D
|
// 2-D table flattened to 1-D
|
||||||
val offsets =
|
val offsets =
|
||||||
io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address)))
|
io.window.windowElts.flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||||
val valids = chosenMatches.flatMap(_.asBools)
|
val valids = chosenMatches.flatMap(_.asBools)
|
||||||
// indicates for each word in the coalesced chunk whether it is accessed by
|
// indicates for each word in the coalesced chunk whether it is accessed by
|
||||||
// any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four
|
// any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four
|
||||||
@@ -650,7 +661,7 @@ class MultiCoalescer(
|
|||||||
queueT: CoalShiftQueue[NonCoalescedRequest],
|
queueT: CoalShiftQueue[NonCoalescedRequest],
|
||||||
coalReqT: CoalescedRequest
|
coalReqT: CoalescedRequest
|
||||||
) extends Module {
|
) extends Module {
|
||||||
val invalidateT = Valid(Vec(config.numLanes, UInt(config.reqQueueDepth.W)))
|
val invalidateT = Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W)))
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
// coalescing window, connected to the contents of the request queues
|
// coalescing window, connected to the contents of the request queues
|
||||||
val window = Input(queueT.io.cloneType)
|
val window = Input(queueT.io.cloneType)
|
||||||
@@ -721,12 +732,12 @@ class MultiCoalescer(
|
|||||||
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
|
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
|
||||||
|
|
||||||
// flatten requests and matches
|
// flatten requests and matches
|
||||||
val flatReqs = io.window.elts.flatten
|
val flatReqs = io.window.windowElts.flatten
|
||||||
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
|
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
|
||||||
|
|
||||||
// check for word alignment in addresses
|
// check for word alignment in addresses
|
||||||
assert(
|
assert(
|
||||||
io.window.elts
|
io.window.windowElts
|
||||||
.flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U))
|
.flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U))
|
||||||
.zip(io.window.mask.flatMap(_.asBools))
|
.zip(io.window.mask.flatMap(_.asBools))
|
||||||
.map { case (aligned, valid) => (!valid) || aligned }
|
.map { case (aligned, valid) => (!valid) || aligned }
|
||||||
@@ -783,7 +794,7 @@ class MultiCoalescer(
|
|||||||
io.coalReq.bits.data := data.asUInt
|
io.coalReq.bits.data := data.asUInt
|
||||||
io.coalReq.bits.size := chosenSize
|
io.coalReq.bits.size := chosenSize
|
||||||
io.coalReq.bits.address := chosenBundle.baseAddr
|
io.coalReq.bits.address := chosenBundle.baseAddr
|
||||||
io.coalReq.bits.op := io.window.elts(chosenBundle.leaderIdx).head.op
|
io.coalReq.bits.op := io.window.windowElts(chosenBundle.leaderIdx).head.op
|
||||||
io.coalReq.valid := coalesceValid
|
io.coalReq.valid := coalesceValid
|
||||||
|
|
||||||
io.invalidate.bits := chosenBundle.matchOH
|
io.invalidate.bits := chosenBundle.matchOH
|
||||||
@@ -850,6 +861,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
println(s" numLanes: ${config.numLanes}")
|
println(s" numLanes: ${config.numLanes}")
|
||||||
println(s" wordSizeInBytes: ${config.wordSizeInBytes}")
|
println(s" wordSizeInBytes: ${config.wordSizeInBytes}")
|
||||||
println(s" coalLogSizes: ${config.coalLogSizes}")
|
println(s" coalLogSizes: ${config.coalLogSizes}")
|
||||||
|
println(s" timeCoalWindowSize: ${config.timeCoalWindowSize}")
|
||||||
println(s" numOldSrcIds: ${config.numOldSrcIds}")
|
println(s" numOldSrcIds: ${config.numOldSrcIds}")
|
||||||
println(s" numNewSrcIds: ${config.numNewSrcIds}")
|
println(s" numNewSrcIds: ${config.numNewSrcIds}")
|
||||||
println(s" reqQueueDepth: ${config.reqQueueDepth}")
|
println(s" reqQueueDepth: ${config.reqQueueDepth}")
|
||||||
@@ -976,7 +988,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
// Connect coalesced request to be recorded in the uncoalescer table.
|
// Connect coalesced request to be recorded in the uncoalescer table.
|
||||||
inflightTable.io.inCoalReq <> coalSourceGen.io.outReq
|
inflightTable.io.inCoalReq <> coalSourceGen.io.outReq
|
||||||
inflightTable.io.invalidate := coalescer.io.invalidate
|
inflightTable.io.invalidate := coalescer.io.invalidate
|
||||||
inflightTable.io.windowElts := reqQueues.io.elts
|
inflightTable.io.windowElts := reqQueues.io.windowElts
|
||||||
|
|
||||||
// This is the final coalesced request.
|
// This is the final coalesced request.
|
||||||
val coalReq = inflightTable.io.outCoalReq
|
val coalReq = inflightTable.io.outCoalReq
|
||||||
@@ -1016,7 +1028,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
|
|
||||||
// The maximum number of requests from a single lane that can go into a
|
// The maximum number of requests from a single lane that can go into a
|
||||||
// coalesced request.
|
// coalesced request.
|
||||||
val numPerLaneReqs = config.reqQueueDepth
|
val numPerLaneReqs = config.timeCoalWindowSize
|
||||||
|
|
||||||
// FIXME: no need to contain maxCoalLogSize data
|
// FIXME: no need to contain maxCoalLogSize data
|
||||||
val respQueueEntryT = new Response(
|
val respQueueEntryT = new Response(
|
||||||
@@ -1117,11 +1129,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
// Connect uncoalescer results back into response queue
|
// Connect uncoalescer results back into response queue
|
||||||
(respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach {
|
(respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach {
|
||||||
case ((q, sameLaneUncoalResps), lane) =>
|
case ((q, sameLaneUncoalResps), lane) =>
|
||||||
// reqQueueDepth here is the maximum number of same-lane, different-time
|
// timeCoalWindowSize is the maximum number of same-lane, different-time
|
||||||
// requests that can go into a single coalesced response. We need to have
|
// requests that can go into a single coalesced response. We need to
|
||||||
// that many enq ports to not backpressure the uncoalescer.
|
// have that many enq ports to not backpressure the uncoalescer.
|
||||||
require(
|
require(
|
||||||
q.io.enq.length == config.reqQueueDepth + respQueueUncoalPortOffset,
|
q.io.enq.length == config.timeCoalWindowSize + respQueueUncoalPortOffset,
|
||||||
s"wrong number of enq ports for MultiPort response queue"
|
s"wrong number of enq ports for MultiPort response queue"
|
||||||
)
|
)
|
||||||
// slice the ports reserved for uncoalesced response
|
// slice the ports reserved for uncoalesced response
|
||||||
@@ -1161,10 +1173,7 @@ class Uncoalescer(
|
|||||||
val coalResp = Flipped(Decoupled(new CoalescedResponse(config)))
|
val coalResp = Flipped(Decoupled(new CoalescedResponse(config)))
|
||||||
val respQueueIO = Vec(
|
val respQueueIO = Vec(
|
||||||
config.numLanes,
|
config.numLanes,
|
||||||
// reqQueueDepth because if we're doing time-coalescing, that's the
|
Vec(config.timeCoalWindowSize, Decoupled(new NonCoalescedResponse(config)))
|
||||||
// maximum number of same-lane, different-time requests that can go into
|
|
||||||
// a single coalesced request.
|
|
||||||
Vec(config.reqQueueDepth, Decoupled(new NonCoalescedResponse(config)))
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -1269,7 +1278,7 @@ class InFlightTable(
|
|||||||
config.maxCoalLogSize - config.wordSizeWidth // assumes word offset
|
config.maxCoalLogSize - config.wordSizeWidth // assumes word offset
|
||||||
val entryT = new InFlightTableEntry(
|
val entryT = new InFlightTableEntry(
|
||||||
config.numLanes,
|
config.numLanes,
|
||||||
config.reqQueueDepth,
|
config.timeCoalWindowSize,
|
||||||
log2Ceil(config.numOldSrcIds),
|
log2Ceil(config.numOldSrcIds),
|
||||||
log2Ceil(config.numNewSrcIds),
|
log2Ceil(config.numNewSrcIds),
|
||||||
config.maxCoalLogSize, // FIXME: offsetBits?
|
config.maxCoalLogSize, // FIXME: offsetBits?
|
||||||
@@ -1289,12 +1298,12 @@ class InFlightTable(
|
|||||||
// invalidate signal coming out of coalescer. Needed to generate new entry
|
// invalidate signal coming out of coalescer. Needed to generate new entry
|
||||||
// for the table.
|
// for the table.
|
||||||
val invalidate =
|
val invalidate =
|
||||||
Input(Valid(Vec(config.numLanes, UInt(config.reqQueueDepth.W))))
|
Input(Valid(Vec(config.numLanes, UInt(config.timeCoalWindowSize.W))))
|
||||||
// coalescing window, connected to the contents of the request queues.
|
// coalescing window, connected to the contents of the request queues.
|
||||||
// Need this to generate new entry for the table.
|
// Need this to generate new entry for the table.
|
||||||
// TODO: duplicate type construction
|
// TODO: duplicate type construction
|
||||||
val windowElts =
|
val windowElts =
|
||||||
Input(Vec(config.numLanes, Vec(config.reqQueueDepth, nonCoalReqT)))
|
Input(Vec(config.numLanes, Vec(config.timeCoalWindowSize, nonCoalReqT)))
|
||||||
// InflightTable simply passes through the inCoalReq to outCoalReq, only snooping
|
// InflightTable simply passes through the inCoalReq to outCoalReq, only snooping
|
||||||
// on its data to record what's necessary.
|
// on its data to record what's necessary.
|
||||||
val outCoalReq = Decoupled(coalReqT)
|
val outCoalReq = Decoupled(coalReqT)
|
||||||
|
|||||||
Reference in New Issue
Block a user