Put a pipeline stage at uncoalescer
... to cut timing after inflight table lookup & before splitting/enqueueing logic.
This commit is contained in:
@@ -954,7 +954,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
||||
// This is the final coalesced request.
|
||||
val coalReq = inflightTable.io.outCoalReq
|
||||
// downstream backpressure on the coalesced edge
|
||||
// TODO: custom <>?
|
||||
// @cleanup: custom <>?
|
||||
inflightTable.io.outCoalReq.ready := tlCoal.a.ready
|
||||
tlCoal.a.valid := coalReq.valid
|
||||
val (legal, tlBits) = coalReq.bits.toTLA(edgeCoal)
|
||||
@@ -1078,9 +1078,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
||||
|
||||
// Connect lookup result from InflightTable
|
||||
uncoalescer.io.inflightLookup <> inflightTable.io.lookup
|
||||
// InflightTable IO: Look up the table with incoming coalesced responses
|
||||
// @cleanup: this should be done inside uncoalescer
|
||||
inflightTable.io.lookupSourceId := tlCoal.d.bits.source
|
||||
// Look up the inflight table with incoming coalesced responses
|
||||
// @cleanup: would be cleaner if inflightTable lookup is contained inside
|
||||
// uncoalescer
|
||||
inflightTable.io.lookupSourceId := coalSourceGen.io.inResp.bits.source
|
||||
|
||||
// Connect uncoalescer results back into response queue
|
||||
(respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach
|
||||
@@ -1132,17 +1133,6 @@ class Uncoalescer(
|
||||
)
|
||||
})
|
||||
|
||||
// Lookup table as soon as a coalesced response fires
|
||||
// @perf: might result in long ready chain?
|
||||
io.inflightLookup.ready := io.coalResp.fire
|
||||
|
||||
// Only accept coalesced response when all enq ports of the response queue
|
||||
// are ready. This is necessary because uncoalescing logic is a
|
||||
// combinational logic that produces all the split responses at the same
|
||||
// cycle, so it needs to be guaranteed that all of them has somewhere to go.
|
||||
val allRespQueueEnqReady = io.respQueueIO.map(_.map(_.ready).reduce(_ && _)).reduce(_ && _)
|
||||
io.coalResp.ready := allRespQueueEnqReady
|
||||
|
||||
// Un-coalescing logic
|
||||
//
|
||||
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
||||
@@ -1165,32 +1155,59 @@ class Uncoalescer(
|
||||
chunks(offset) // MUX
|
||||
}
|
||||
|
||||
// Un-coalesce responses back to individual lanes
|
||||
// Connect uncoalesced results back into each lane's response queue
|
||||
val foundRow = io.inflightLookup.bits
|
||||
(foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, enqIOs), lane) =>
|
||||
foundLane.reqs.zipWithIndex.foreach { case (foundReq, depth) =>
|
||||
// Pipeline registers for the inflight table lookup result, and the coalesced
|
||||
// response itself. We cut timing here expecting that the table lookup
|
||||
// will take up a long path.
|
||||
val coalRespPipeReg = Module(new Queue(chiselTypeOf(io.coalResp.bits), 1, pipe = true))
|
||||
coalRespPipeReg.io.enq <> io.coalResp
|
||||
val tablePipeReg = Module(new Queue(chiselTypeOf(io.inflightLookup.bits), 1, pipe = true))
|
||||
tablePipeReg.io.enq <> io.inflightLookup
|
||||
tablePipeReg.io.enq.valid := io.inflightLookup.fire
|
||||
|
||||
// inflightTable looks up as soon as ready signal goes up, assuming
|
||||
// io.lookupSourceId is valid, so need to be careful when we assert ready by
|
||||
// checking both if we have space in the pipeline register, and if there is a
|
||||
// valid response on the channel
|
||||
io.inflightLookup.ready := tablePipeReg.io.enq.ready && io.coalResp.fire
|
||||
|
||||
// Only proceed uncoalescing when all enq ports of the response queue are
|
||||
// ready. This is necessary because uncoalescing logic is a combinational
|
||||
// logic that produces all the split responses at the same cycle, so it needs
|
||||
// to be guaranteed that all of them has somewhere to go.
|
||||
val allRespQueueEnqReady = io.respQueueIO.map(_.map(_.ready).reduce(_ && _)).reduce(_ && _)
|
||||
tablePipeReg.io.deq.ready := allRespQueueEnqReady
|
||||
coalRespPipeReg.io.deq.ready := allRespQueueEnqReady
|
||||
|
||||
assert(tablePipeReg.io.enq.fire === coalRespPipeReg.io.enq.fire,
|
||||
"enqueue timing for uncoalescer pipeline registers out-of-sync!")
|
||||
assert(tablePipeReg.io.deq.fire === coalRespPipeReg.io.deq.fire,
|
||||
"dequeue timing for uncoalescer pipeline registers out-of-sync!")
|
||||
|
||||
// Un-coalesce responses back to individual lanes. Connect uncoalesced
|
||||
// results back into each lane's response queue.
|
||||
val tableRow = tablePipeReg.io.deq
|
||||
(io.respQueueIO zip tableRow.bits.lanes).zipWithIndex.foreach { case ((enqIOs, lane), laneNum) =>
|
||||
lane.reqs.zipWithIndex.foreach { case (req, depth) =>
|
||||
val enqIO = enqIOs(depth)
|
||||
// spatial-only coalescing: only looking at 0th srcId entry
|
||||
enqIO.valid := false.B
|
||||
enqIO.bits := DontCare
|
||||
// debug
|
||||
// when (resp.valid) {
|
||||
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
|
||||
// printf(s"${i}-th uncoalesced response came back from lane ${laneNum}\n")
|
||||
// }
|
||||
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
||||
|
||||
when(io.inflightLookup.valid && foundReq.valid) {
|
||||
enqIO.valid := io.coalResp.fire && foundReq.valid
|
||||
enqIO.bits.op := foundReq.op
|
||||
enqIO.bits.source := foundReq.source
|
||||
val logSize = foundRow.sizeEnumT.enumToLogSize(foundReq.sizeEnum)
|
||||
when(tableRow.valid && req.valid) {
|
||||
enqIO.valid := tableRow.fire && req.valid
|
||||
enqIO.bits.op := req.op
|
||||
enqIO.bits.source := req.source
|
||||
val logSize = tableRow.bits.sizeEnumT.enumToLogSize(req.sizeEnum)
|
||||
enqIO.bits.size := logSize
|
||||
enqIO.bits.data :=
|
||||
getCoalescedDataChunk(
|
||||
io.coalResp.bits.data,
|
||||
io.coalResp.bits.data.getWidth,
|
||||
foundReq.offset,
|
||||
coalRespPipeReg.io.deq.bits.data,
|
||||
coalRespPipeReg.io.deq.bits.data.getWidth,
|
||||
req.offset,
|
||||
logSize
|
||||
)
|
||||
// is this necessary?
|
||||
@@ -1219,16 +1236,9 @@ class InFlightTable(
|
||||
config.maxCoalLogSize, // FIXME: offsetBits?
|
||||
config.sizeEnum
|
||||
)
|
||||
|
||||
val entries = config.numNewSrcIds
|
||||
val sourceWidth = log2Ceil(config.numOldSrcIds)
|
||||
|
||||
println(s"CoalescingUnit InFlightTable config: {")
|
||||
println(s" sourceWidth: ${sourceWidth}")
|
||||
println(s" offsetBits: ${offsetBits}")
|
||||
println(s" sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
|
||||
println(s"}")
|
||||
|
||||
val io = IO(new Bundle {
|
||||
// Enqueue IO
|
||||
//
|
||||
@@ -1258,6 +1268,12 @@ class InFlightTable(
|
||||
val lookupSourceId = Input(UInt(sourceWidth.W))
|
||||
})
|
||||
|
||||
println(s"CoalescingUnit InFlightTable config: {")
|
||||
println(s" sourceWidth: ${sourceWidth}")
|
||||
println(s" offsetBits: ${offsetBits}")
|
||||
println(s" sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
|
||||
println(s"}")
|
||||
|
||||
val table = Mem(
|
||||
entries,
|
||||
new Bundle {
|
||||
@@ -1339,12 +1355,14 @@ class InFlightTable(
|
||||
// Lookup logic
|
||||
io.lookup.valid := table(io.lookupSourceId).valid
|
||||
io.lookup.bits := table(io.lookupSourceId).bits
|
||||
// Dequeue as soon as lookup succeeds
|
||||
when(io.lookup.fire) {
|
||||
// every lookup to the table should succeed as the request should have
|
||||
// gotten recorded earlier than the response
|
||||
// every lookup to the table should succeed as the request should have
|
||||
// gotten recorded earlier than the response
|
||||
when(io.lookup.ready) {
|
||||
assert(table(io.lookupSourceId).valid === true.B,
|
||||
"table lookup with a valid sourceId failed")
|
||||
}
|
||||
// Dequeue as soon as lookup succeeds
|
||||
when(io.lookup.fire) {
|
||||
table(io.lookupSourceId).valid := false.B
|
||||
}
|
||||
assert(
|
||||
|
||||
Reference in New Issue
Block a user