Separate sourcegen for coalesced req for clearer dataflow

Fixes 'same lookup and enq source ID' error in uncoalescer.
This commit is contained in:
Hansung Kim
2023-05-14 16:09:09 -07:00
parent e02e4ca500
commit d8e9dab329

View File

@@ -56,7 +56,7 @@ case class CoalescerConfig(
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
addressWidth: Int, // assume <= 32 addressWidth: Int, // assume <= 32
dataBusWidth: Int, // memory-side downstream TileLink data bus size dataBusWidth: Int, // memory-side downstream TileLink data bus size
// this has to be at least larger than the word size for // this has to be at least larger than word size for
// the coalescer to perform well // the coalescer to perform well
// watermark = 2, // minimum buffer occupancy to start coalescing // watermark = 2, // minimum buffer occupancy to start coalescing
wordSizeInBytes: Int, // 32-bit system wordSizeInBytes: Int, // 32-bit system
@@ -87,14 +87,14 @@ object defaultConfig extends CoalescerConfig(
numLanes = 4, numLanes = 4,
queueDepth = 1, queueDepth = 1,
waitTimeout = 8, waitTimeout = 8,
addressWidth = 32, addressWidth = 24,
dataBusWidth = 3, // 2^3=8 bytes, 64 bit bus dataBusWidth = 3, // 2^3=8 bytes, 64 bit bus
// watermark = 2, // watermark = 2,
wordSizeInBytes = 4, wordSizeInBytes = 4,
// when attaching to SoC, 16 source IDs are not enough due to longer latency // when attaching to SoC, 16 source IDs are not enough due to longer latency
numOldSrcIds = 16, numOldSrcIds = 16,
numNewSrcIds = 8, numNewSrcIds = 8,
respQueueDepth = 4, respQueueDepth = 8,
coalLogSizes = Seq(3), coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum, sizeEnum = DefaultInFlightTableSizeEnum,
numCoalReqs = 1, numCoalReqs = 1,
@@ -694,6 +694,31 @@ class MultiCoalescer(
if (!config.enable) disable if (!config.enable) disable
} }
class CoalescerSourceGen(
config: CoalescerConfig,
coalReqT: CoalescedRequest,
respT: TLBundleD
) extends Module {
val io = IO(new Bundle {
val inReq = Flipped(Decoupled(coalReqT.cloneType))
val outReq = Decoupled(coalReqT.cloneType)
val inResp = Flipped(Decoupled(respT.cloneType))
})
val sourceGen = Module(
new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds), ignoreInUse = false)
)
sourceGen.io.gen := io.inReq.fire // use up a source ID only when request is created
sourceGen.io.reclaim.valid := io.inResp.valid
sourceGen.io.reclaim.bits := io.inResp.bits.source
io.inResp.ready := true.B // should be always ready to reclaim old ID
// TODO: make sourceGen.io.reclaim Decoupled?
io.outReq <> io.inReq
// overwrite bits affected by sourcegen backpressure
io.outReq.valid := io.inReq.valid && sourceGen.io.id.valid
io.outReq.bits.source := sourceGen.io.id.bits
}
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
extends LazyModuleImp(outer) { extends LazyModuleImp(outer) {
require( require(
@@ -793,22 +818,34 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
val (tlCoal, edgeCoal) = outer.coalescerNode.out.head val (tlCoal, edgeCoal) = outer.coalescerNode.out.head
val sourceGen = Module( // The request coming out of MultiCoalescer still needs to go through source
new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds), ignoreInUse = false) // ID generation.
) // We pull the sourcegen part out of MultiCoalescer to a separate Module to
sourceGen.io.gen := coalescer.io.coalReq.fire // use up a source ID only when request is created // reduce IO bloat in the coalescer and top-level clutter.
sourceGen.io.reclaim.valid := tlCoal.d.valid //
sourceGen.io.reclaim.bits := tlCoal.d.bits.source // The overall flow looks like:
// ┌────────────────┐ ┌─────────────────────┐ ┌────────────────────┐
// │ CoalShiftQueue ├─┤ Mono/MultiCoalescer ├─┤ CoalescerSourceGen ├── TileLink req
// └────────────────┘ └─────────────────────┘ └────────────────────┘
// ^
// ┌────────────┐ ┌─────────────┐ │
// │ RespQueues ├─┤ Uncoalescer ├──┴────── TileLink resp
// └────────────┘ └─────────────┘
//
val coalSourceGen = Module(new CoalescerSourceGen(config, coalReqT, tlCoal.d.bits))
coalSourceGen.io.inReq <> coalescer.io.coalReq
coalSourceGen.io.inResp <> tlCoal.d
// This is the final coalesced request.
val coalReq = coalSourceGen.io.outReq
dontTouch(coalReq)
val coalReqValid = coalescer.io.coalReq.valid && sourceGen.io.id.valid tlCoal.a.valid := coalReq.valid
tlCoal.a.valid := coalReqValid tlCoal.a.bits := coalReq.bits.toTLA(edgeCoal)
tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
tlCoal.a.bits.source := sourceGen.io.id.bits
coalescer.io.coalReq.ready := tlCoal.a.ready coalescer.io.coalReq.ready := tlCoal.a.ready
tlCoal.b.ready := true.B tlCoal.b.ready := true.B
tlCoal.c.valid := false.B tlCoal.c.valid := false.B
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. // tlCoal.d.ready should be connected to uncoalescer's ready, done below.
tlCoal.e.valid := false.B tlCoal.e.valid := false.B
require( require(
@@ -816,7 +853,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
s"tlCoal param `sourceBits` (${tlCoal.params.sourceBits}) mismatches coalescer constant" s"tlCoal param `sourceBits` (${tlCoal.params.sourceBits}) mismatches coalescer constant"
+ s" (${log2Ceil(config.numNewSrcIds)})" + s" (${log2Ceil(config.numNewSrcIds)})"
) )
require( require(
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
@@ -913,7 +949,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
// connect coalesced request that is newly generated and being recorded in // connect coalesced request that is newly generated and being recorded in
// the uncoalescer // the uncoalescer
uncoalescer.io.coalReq <> coalescer.io.coalReq uncoalescer.io.coalReq <> coalReq
// We can't simply use coalescer.io.coalReq.valid here. // We can't simply use coalescer.io.coalReq.valid here.
// coalescer.io.coalReq.valid tells us when there exists a valid coalescing // coalescer.io.coalReq.valid tells us when there exists a valid coalescing
// combination, but not when we can actually fire that to downstream, because // combination, but not when we can actually fire that to downstream, because
@@ -923,9 +959,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
// NOTE(hansung): this feels slightly awkward. Maybe doing sourcegen inside // NOTE(hansung): this feels slightly awkward. Maybe doing sourcegen inside
// the coalescer so that it gives the final call is better, but that may be // the coalescer so that it gives the final call is better, but that may be
// too much IO for the coalescer. // too much IO for the coalescer.
uncoalescer.io.coalReq.valid := coalReqValid
uncoalescer.io.invalidate := coalescer.io.invalidate uncoalescer.io.invalidate := coalescer.io.invalidate
val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits)
uncoalescer.io.windowElts := reqQueues.io.elts uncoalescer.io.windowElts := reqQueues.io.elts
// connect coalesced response going into the uncoalescer, ready to be // connect coalesced response going into the uncoalescer, ready to be
// uncoalesced // uncoalesced
@@ -1842,7 +1876,7 @@ class DummyCoalescer(implicit p: Parameters) extends LazyModule {
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM( new TLRAM(
address = AddressSet(0x0000, 0xffffffff), address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << config.dataBusWidth) beatBytes = (1 << config.dataBusWidth)
) )
) )
@@ -1886,7 +1920,7 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters)
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM( new TLRAM(
address = AddressSet(0x0000, 0xffffffff), address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << config.dataBusWidth) beatBytes = (1 << config.dataBusWidth)
) )
) )
@@ -1940,7 +1974,7 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM( new TLRAM(
address = AddressSet(0x0000, 0xffffffff), address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << defaultConfig.dataBusWidth) beatBytes = (1 << defaultConfig.dataBusWidth)
) )
) )