Put a pipeline stage at uncoalescer
... to cut timing after inflight table lookup & before splitting/enqueueing logic.
This commit is contained in:
@@ -954,7 +954,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
// This is the final coalesced request.
|
// This is the final coalesced request.
|
||||||
val coalReq = inflightTable.io.outCoalReq
|
val coalReq = inflightTable.io.outCoalReq
|
||||||
// downstream backpressure on the coalesced edge
|
// downstream backpressure on the coalesced edge
|
||||||
// TODO: custom <>?
|
// @cleanup: custom <>?
|
||||||
inflightTable.io.outCoalReq.ready := tlCoal.a.ready
|
inflightTable.io.outCoalReq.ready := tlCoal.a.ready
|
||||||
tlCoal.a.valid := coalReq.valid
|
tlCoal.a.valid := coalReq.valid
|
||||||
val (legal, tlBits) = coalReq.bits.toTLA(edgeCoal)
|
val (legal, tlBits) = coalReq.bits.toTLA(edgeCoal)
|
||||||
@@ -1078,9 +1078,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
|
|||||||
|
|
||||||
// Connect lookup result from InflightTable
|
// Connect lookup result from InflightTable
|
||||||
uncoalescer.io.inflightLookup <> inflightTable.io.lookup
|
uncoalescer.io.inflightLookup <> inflightTable.io.lookup
|
||||||
// InflightTable IO: Look up the table with incoming coalesced responses
|
// Look up the inflight table with incoming coalesced responses
|
||||||
// @cleanup: this should be done inside uncoalescer
|
// @cleanup: would be cleaner if inflightTable lookup is contained inside
|
||||||
inflightTable.io.lookupSourceId := tlCoal.d.bits.source
|
// uncoalescer
|
||||||
|
inflightTable.io.lookupSourceId := coalSourceGen.io.inResp.bits.source
|
||||||
|
|
||||||
// Connect uncoalescer results back into response queue
|
// Connect uncoalescer results back into response queue
|
||||||
(respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach
|
(respQueues zip uncoalescer.io.respQueueIO).zipWithIndex.foreach
|
||||||
@@ -1132,17 +1133,6 @@ class Uncoalescer(
|
|||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Lookup table as soon as a coalesced response fires
|
|
||||||
// @perf: might result in long ready chain?
|
|
||||||
io.inflightLookup.ready := io.coalResp.fire
|
|
||||||
|
|
||||||
// Only accept coalesced response when all enq ports of the response queue
|
|
||||||
// are ready. This is necessary because uncoalescing logic is a
|
|
||||||
// combinational logic that produces all the split responses at the same
|
|
||||||
// cycle, so it needs to be guaranteed that all of them has somewhere to go.
|
|
||||||
val allRespQueueEnqReady = io.respQueueIO.map(_.map(_.ready).reduce(_ && _)).reduce(_ && _)
|
|
||||||
io.coalResp.ready := allRespQueueEnqReady
|
|
||||||
|
|
||||||
// Un-coalescing logic
|
// Un-coalescing logic
|
||||||
//
|
//
|
||||||
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
|
||||||
@@ -1165,32 +1155,59 @@ class Uncoalescer(
|
|||||||
chunks(offset) // MUX
|
chunks(offset) // MUX
|
||||||
}
|
}
|
||||||
|
|
||||||
// Un-coalesce responses back to individual lanes
|
// Pipeline registers for the inflight table lookup result, and the coalesced
|
||||||
// Connect uncoalesced results back into each lane's response queue
|
// response itself. We cut timing here expecting that the table lookup
|
||||||
val foundRow = io.inflightLookup.bits
|
// will take up a long path.
|
||||||
(foundRow.lanes zip io.respQueueIO).zipWithIndex.foreach { case ((foundLane, enqIOs), lane) =>
|
val coalRespPipeReg = Module(new Queue(chiselTypeOf(io.coalResp.bits), 1, pipe = true))
|
||||||
foundLane.reqs.zipWithIndex.foreach { case (foundReq, depth) =>
|
coalRespPipeReg.io.enq <> io.coalResp
|
||||||
|
val tablePipeReg = Module(new Queue(chiselTypeOf(io.inflightLookup.bits), 1, pipe = true))
|
||||||
|
tablePipeReg.io.enq <> io.inflightLookup
|
||||||
|
tablePipeReg.io.enq.valid := io.inflightLookup.fire
|
||||||
|
|
||||||
|
// inflightTable looks up as soon as ready signal goes up, assuming
|
||||||
|
// io.lookupSourceId is valid, so need to be careful when we assert ready by
|
||||||
|
// checking both if we have space in the pipeline register, and if there is a
|
||||||
|
// valid response on the channel
|
||||||
|
io.inflightLookup.ready := tablePipeReg.io.enq.ready && io.coalResp.fire
|
||||||
|
|
||||||
|
// Only proceed uncoalescing when all enq ports of the response queue are
|
||||||
|
// ready. This is necessary because uncoalescing logic is a combinational
|
||||||
|
// logic that produces all the split responses at the same cycle, so it needs
|
||||||
|
// to be guaranteed that all of them has somewhere to go.
|
||||||
|
val allRespQueueEnqReady = io.respQueueIO.map(_.map(_.ready).reduce(_ && _)).reduce(_ && _)
|
||||||
|
tablePipeReg.io.deq.ready := allRespQueueEnqReady
|
||||||
|
coalRespPipeReg.io.deq.ready := allRespQueueEnqReady
|
||||||
|
|
||||||
|
assert(tablePipeReg.io.enq.fire === coalRespPipeReg.io.enq.fire,
|
||||||
|
"enqueue timing for uncoalescer pipeline registers out-of-sync!")
|
||||||
|
assert(tablePipeReg.io.deq.fire === coalRespPipeReg.io.deq.fire,
|
||||||
|
"dequeue timing for uncoalescer pipeline registers out-of-sync!")
|
||||||
|
|
||||||
|
// Un-coalesce responses back to individual lanes. Connect uncoalesced
|
||||||
|
// results back into each lane's response queue.
|
||||||
|
val tableRow = tablePipeReg.io.deq
|
||||||
|
(io.respQueueIO zip tableRow.bits.lanes).zipWithIndex.foreach { case ((enqIOs, lane), laneNum) =>
|
||||||
|
lane.reqs.zipWithIndex.foreach { case (req, depth) =>
|
||||||
val enqIO = enqIOs(depth)
|
val enqIO = enqIOs(depth)
|
||||||
// spatial-only coalescing: only looking at 0th srcId entry
|
|
||||||
enqIO.valid := false.B
|
enqIO.valid := false.B
|
||||||
enqIO.bits := DontCare
|
enqIO.bits := DontCare
|
||||||
// debug
|
// debug
|
||||||
// when (resp.valid) {
|
// when (resp.valid) {
|
||||||
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
|
// printf(s"${i}-th uncoalesced response came back from lane ${laneNum}\n")
|
||||||
// }
|
// }
|
||||||
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
||||||
|
|
||||||
when(io.inflightLookup.valid && foundReq.valid) {
|
when(tableRow.valid && req.valid) {
|
||||||
enqIO.valid := io.coalResp.fire && foundReq.valid
|
enqIO.valid := tableRow.fire && req.valid
|
||||||
enqIO.bits.op := foundReq.op
|
enqIO.bits.op := req.op
|
||||||
enqIO.bits.source := foundReq.source
|
enqIO.bits.source := req.source
|
||||||
val logSize = foundRow.sizeEnumT.enumToLogSize(foundReq.sizeEnum)
|
val logSize = tableRow.bits.sizeEnumT.enumToLogSize(req.sizeEnum)
|
||||||
enqIO.bits.size := logSize
|
enqIO.bits.size := logSize
|
||||||
enqIO.bits.data :=
|
enqIO.bits.data :=
|
||||||
getCoalescedDataChunk(
|
getCoalescedDataChunk(
|
||||||
io.coalResp.bits.data,
|
coalRespPipeReg.io.deq.bits.data,
|
||||||
io.coalResp.bits.data.getWidth,
|
coalRespPipeReg.io.deq.bits.data.getWidth,
|
||||||
foundReq.offset,
|
req.offset,
|
||||||
logSize
|
logSize
|
||||||
)
|
)
|
||||||
// is this necessary?
|
// is this necessary?
|
||||||
@@ -1219,16 +1236,9 @@ class InFlightTable(
|
|||||||
config.maxCoalLogSize, // FIXME: offsetBits?
|
config.maxCoalLogSize, // FIXME: offsetBits?
|
||||||
config.sizeEnum
|
config.sizeEnum
|
||||||
)
|
)
|
||||||
|
|
||||||
val entries = config.numNewSrcIds
|
val entries = config.numNewSrcIds
|
||||||
val sourceWidth = log2Ceil(config.numOldSrcIds)
|
val sourceWidth = log2Ceil(config.numOldSrcIds)
|
||||||
|
|
||||||
println(s"CoalescingUnit InFlightTable config: {")
|
|
||||||
println(s" sourceWidth: ${sourceWidth}")
|
|
||||||
println(s" offsetBits: ${offsetBits}")
|
|
||||||
println(s" sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
|
|
||||||
println(s"}")
|
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
// Enqueue IO
|
// Enqueue IO
|
||||||
//
|
//
|
||||||
@@ -1258,6 +1268,12 @@ class InFlightTable(
|
|||||||
val lookupSourceId = Input(UInt(sourceWidth.W))
|
val lookupSourceId = Input(UInt(sourceWidth.W))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
println(s"CoalescingUnit InFlightTable config: {")
|
||||||
|
println(s" sourceWidth: ${sourceWidth}")
|
||||||
|
println(s" offsetBits: ${offsetBits}")
|
||||||
|
println(s" sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
|
||||||
|
println(s"}")
|
||||||
|
|
||||||
val table = Mem(
|
val table = Mem(
|
||||||
entries,
|
entries,
|
||||||
new Bundle {
|
new Bundle {
|
||||||
@@ -1339,12 +1355,14 @@ class InFlightTable(
|
|||||||
// Lookup logic
|
// Lookup logic
|
||||||
io.lookup.valid := table(io.lookupSourceId).valid
|
io.lookup.valid := table(io.lookupSourceId).valid
|
||||||
io.lookup.bits := table(io.lookupSourceId).bits
|
io.lookup.bits := table(io.lookupSourceId).bits
|
||||||
// Dequeue as soon as lookup succeeds
|
// every lookup to the table should succeed as the request should have
|
||||||
when(io.lookup.fire) {
|
// gotten recorded earlier than the response
|
||||||
// every lookup to the table should succeed as the request should have
|
when(io.lookup.ready) {
|
||||||
// gotten recorded earlier than the response
|
|
||||||
assert(table(io.lookupSourceId).valid === true.B,
|
assert(table(io.lookupSourceId).valid === true.B,
|
||||||
"table lookup with a valid sourceId failed")
|
"table lookup with a valid sourceId failed")
|
||||||
|
}
|
||||||
|
// Dequeue as soon as lookup succeeds
|
||||||
|
when(io.lookup.fire) {
|
||||||
table(io.lookupSourceId).valid := false.B
|
table(io.lookupSourceId).valid := false.B
|
||||||
}
|
}
|
||||||
assert(
|
assert(
|
||||||
|
|||||||
Reference in New Issue
Block a user