From 5fed3ef823d0a8cd04005fd0d098ba313760ed8a Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 15:42:23 -0700 Subject: [PATCH 01/10] Generalize Req/RespQueueEntry into Response/Request bundle --- src/main/scala/tilelink/Coalescing.scala | 76 ++++++++++-------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index d63e9cf..7835210 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -135,7 +135,10 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La lazy val module = new CoalescingUnitImp(this, config) } -class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) extends Bundle { +// Protocol-agnostic bundles that represent a request and a response to the +// coalescer. + +class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) extends Bundle { require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8") val op = UInt(1.W) // 0=READ 1=WRITE val address = UInt(addressWidth.W) @@ -163,7 +166,7 @@ class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWid } } -class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { +class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { val op = UInt(1.W) // 0=READ 1=WRITE val size = UInt(sizeWidth.W) val source = UInt(sourceWidth.W) @@ -192,6 +195,15 @@ class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends B } } +class NonCoalescedResponse(config: CoalescerConfig) +extends Response(sourceWidth = log2Ceil(config.numOldSrcIds), + sizeWidth = config.wordSizeWidth, + dataWidth = config.wordSizeInBytes * 8) +class CoalescedResponse(config: CoalescerConfig) +extends Response(sourceWidth = log2Ceil(config.numNewSrcIds), + sizeWidth = log2Ceil(config.maxCoalLogSize), + dataWidth = (8 * (1 << config.maxCoalLogSize))) + // If `ignoreInUse`, just keep giving out new IDs without checking if it is in // use. class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module { @@ -340,7 +352,7 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e } // Software model: coalescer.py -class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], +class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[Request], config: CoalescerConfig) extends Module { val io = IO(new Bundle { val window = Input(windowT.io.cloneType) @@ -376,7 +388,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], val size = coalLogSize val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U - def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = { + def canMatch(req0: Request, req0v: Bool, req1: Request, req1v: Bool): Bool = { (req0.op === req1.op) && (req0v && req1v) && ((req0.address & this.addrMask) === (req1.address & this.addrMask)) @@ -471,7 +483,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], // coalesced request out of all possible combinations. // // Software model: coalescer.py -class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry, +class MultiCoalescer(windowT: CoalShiftQueue[Request], coalReqT: Request, config: CoalescerConfig) extends Module { val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues @@ -612,11 +624,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends val sourceWidth = outer.cpuNode.in.head._1.params.sourceBits // note we are using word size. assuming all coalescer inputs are word sized - val reqQueueEntryT = new ReqQueueEntry(sourceWidth, config.wordSizeWidth, + val reqQueueEntryT = new Request(sourceWidth, config.wordSizeWidth, config.addressWidth, (config.wordSizeInBytes * 8)) val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)) - val coalReqT = new ReqQueueEntry(log2Ceil(config.numNewSrcIds), log2Ceil(config.maxCoalLogSize), + val coalReqT = new Request(log2Ceil(config.numNewSrcIds), log2Ceil(config.maxCoalLogSize), config.addressWidth, (1 << config.maxCoalLogSize) * 8) val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config)) coalescer.io.window := reqQueues.io @@ -703,7 +715,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends // coalesced request. Upper bound is min(DEPTH, 2**sourceWidth). val numPerLaneReqs = config.queueDepth - val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), + // FIXME: no need to contain maxCoalLogSize data + val respQueueEntryT = new Response(sourceWidth, log2Ceil(config.maxCoalLogSize), (1 << config.maxCoalLogSize) * 8) val respQueues = Seq.tabulate(config.numLanes) { _ => Module( @@ -821,8 +834,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends uncoalescer.io.newEntry := newEntry // Cleanup: custom <>? uncoalescer.io.coalResp.valid := tlCoal.d.valid - uncoalescer.io.coalResp.bits.source := tlCoal.d.bits.source - uncoalescer.io.coalResp.bits.data := tlCoal.d.bits.data + uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits) tlCoal.d.ready := uncoalescer.io.coalResp.ready // Connect uncoalescer results back into each lane's response queue @@ -853,24 +865,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends dontTouch(tlCoal.d) } -// Protocol-agnostic bundle that represents a coalesced response. -// -// Having this makes it easier to: -// * do unit tests -- no need to deal with TileLink in the chiseltest code -// * adapt coalescer to custom protocols like a custom L1 cache interface. -// -// FIXME: overlaps with RespQueueEntry. Trait-ify -class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle { - val source = UInt(log2Ceil(config.numNewSrcIds).W) - val data = UInt((8 * (1 << config.maxCoalLogSize)).W) - - def fromTLD(bundle:TLBundleD): Unit = { - this.source := bundle.source - this.data := bundle.data - } - -} - class Uncoalescer(config: CoalescerConfig) extends Module { // notes to hansung: // val numLanes: Int, <-> config.NUM_LANES @@ -884,15 +878,14 @@ class Uncoalescer(config: CoalescerConfig) extends Module { val coalReqValid = Input(Bool()) // FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer val newEntry = Input(inflightTable.entryT.cloneType) - val coalResp = Flipped(Decoupled(new CoalescedResponseBundle(config))) + val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) val uncoalResps = Output( Vec( config.numLanes, Vec( config.queueDepth, ValidIO( - new RespQueueEntry(log2Ceil(config.numOldSrcIds), config.wordSizeWidth, - config.wordSizeInBytes * 8) + new NonCoalescedResponse(config) ) ) ) @@ -1853,25 +1846,20 @@ class CoalescerXbar(config: CoalescerConfig) (implicit p: Parameters) extends La val node = TLIdentityNode() node :=* outputXbar.node - val nonCoalEntryT = new ReqQueueEntry( + val nonCoalEntryT = new Request( log2Ceil(config.numOldSrcIds), config.wordSizeWidth, config.addressWidth, config.wordSizeInBytes * 8 ) - val coalEntryT = new ReqQueueEntry( + val coalEntryT = new Request( log2Ceil(config.numOldSrcIds), log2Ceil(config.maxCoalLogSize), config.addressWidth, (1 << config.maxCoalLogSize) * 8 ) - val respNonCoalEntryT = new RespQueueEntry( - log2Ceil(config.numOldSrcIds), - config.wordSizeWidth, - config.wordSizeInBytes * 8 - ) - - val respCoalBundleT = new CoalescedResponseBundle(config) + val respNonCoalEntryT = new NonCoalescedResponse(config) + val respCoalBundleT = new CoalescedResponse(config) lazy val module = new CoalescerXbarImpl( @@ -1883,10 +1871,10 @@ class CoalescerXbar(config: CoalescerConfig) (implicit p: Parameters) extends La class CoalescerXbarImpl(outer: CoalescerXbar, config: CoalescerConfig, - nonCoalEntryT: ReqQueueEntry, - coalEntryT: ReqQueueEntry, - respNonCoalEntryT: RespQueueEntry, - respCoalBundleT: CoalescedResponseBundle + nonCoalEntryT: Request, + coalEntryT: Request, + respNonCoalEntryT: Response, + respCoalBundleT: CoalescedResponse ) extends LazyModuleImp(outer){ From 406f90b6332dde2351f95386692410134a072064 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 15:50:58 -0700 Subject: [PATCH 02/10] De-duplicate equivalent Request bundles using NonCoal/Coal variants --- src/main/scala/tilelink/Coalescing.scala | 40 +++++++++---------- .../scala/coalescing/CoalescingUnitTest.scala | 6 +-- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 7835210..0345c8b 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -165,6 +165,16 @@ class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: In bits } } +class NonCoalescedRequest(config: CoalescerConfig) +extends Request(sourceWidth = log2Ceil(config.numOldSrcIds), + sizeWidth = config.wordSizeWidth, + addressWidth = config.addressWidth, + dataWidth = config.wordSizeInBytes * 8) +class CoalescedRequest(config: CoalescerConfig) +extends Request(sourceWidth = log2Ceil(config.numNewSrcIds), + sizeWidth = log2Ceil(config.maxCoalLogSize), + addressWidth = config.addressWidth, + dataWidth = (8 * (1 << config.maxCoalLogSize))) class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { val op = UInt(1.W) // 0=READ 1=WRITE @@ -194,7 +204,6 @@ class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle this.error := bundle.denied } } - class NonCoalescedResponse(config: CoalescerConfig) extends Response(sourceWidth = log2Ceil(config.numOldSrcIds), sizeWidth = config.wordSizeWidth, @@ -352,7 +361,7 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e } // Software model: coalescer.py -class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[Request], +class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedRequest], config: CoalescerConfig) extends Module { val io = IO(new Bundle { val window = Input(windowT.io.cloneType) @@ -483,7 +492,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[Request], // coalesced request out of all possible combinations. // // Software model: coalescer.py -class MultiCoalescer(windowT: CoalShiftQueue[Request], coalReqT: Request, +class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Request, config: CoalescerConfig) extends Module { val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues @@ -622,14 +631,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " + s"mismatch with config.addressWidth (${config.addressWidth})") - val sourceWidth = outer.cpuNode.in.head._1.params.sourceBits + val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits // note we are using word size. assuming all coalescer inputs are word sized - val reqQueueEntryT = new Request(sourceWidth, config.wordSizeWidth, - config.addressWidth, (config.wordSizeInBytes * 8)) + val reqQueueEntryT = new NonCoalescedRequest(config) val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)) - val coalReqT = new Request(log2Ceil(config.numNewSrcIds), log2Ceil(config.maxCoalLogSize), - config.addressWidth, (1 << config.maxCoalLogSize) * 8) + val coalReqT = new CoalescedRequest(config) val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config)) coalescer.io.window := reqQueues.io reqQueues.io.coalescable := coalescer.io.coalescable @@ -716,7 +723,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends val numPerLaneReqs = config.queueDepth // FIXME: no need to contain maxCoalLogSize data - val respQueueEntryT = new Response(sourceWidth, log2Ceil(config.maxCoalLogSize), + val respQueueEntryT = new Response(oldSourceWidth, log2Ceil(config.maxCoalLogSize), (1 << config.maxCoalLogSize) * 8) val respQueues = Seq.tabulate(config.numLanes) { _ => Module( @@ -1846,21 +1853,10 @@ class CoalescerXbar(config: CoalescerConfig) (implicit p: Parameters) extends La val node = TLIdentityNode() node :=* outputXbar.node - val nonCoalEntryT = new Request( - log2Ceil(config.numOldSrcIds), - config.wordSizeWidth, - config.addressWidth, - config.wordSizeInBytes * 8 - ) - val coalEntryT = new Request( - log2Ceil(config.numOldSrcIds), - log2Ceil(config.maxCoalLogSize), - config.addressWidth, - (1 << config.maxCoalLogSize) * 8 - ) + val nonCoalEntryT = new NonCoalescedRequest(config) + val coalEntryT = new CoalescedRequest(config) val respNonCoalEntryT = new NonCoalescedResponse(config) val respCoalBundleT = new CoalescedResponse(config) - lazy val module = new CoalescerXbarImpl( this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT) diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 1a3ceee..29ea8dc 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -127,12 +127,12 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI ) val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) - val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(x.cloneType)) + val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[Request]].map(x => IO(x.cloneType)) val reqQueueEnqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) - val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(Output(x.cloneType))) + val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[Request]].map(x => IO(Output(x.cloneType))) val reqQueueDeqValid = peekIn(4).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType))) val coalReqReady = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType)) - val coalReqBits = IO(Output(peekIn(6).asInstanceOf[ReqQueueEntry].cloneType)) + val coalReqBits = IO(Output(peekIn(6).asInstanceOf[Request].cloneType)) val coalReqValid = IO(Output(peekIn(7).asInstanceOf[Bool].cloneType)) val coalInvalidate = IO(Output(peekIn(8).asInstanceOf[Valid[Vec[UInt]]].cloneType)) From 7fa6be4a8bc12ada8ee03fe7a4fe025c8709f860 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 15:56:30 -0700 Subject: [PATCH 03/10] Use case class for noncoal/coal bundles don't know what they really do, but they look fancy --- src/main/scala/tilelink/Coalescing.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 0345c8b..373337a 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -165,12 +165,12 @@ class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: In bits } } -class NonCoalescedRequest(config: CoalescerConfig) +case class NonCoalescedRequest(config: CoalescerConfig) extends Request(sourceWidth = log2Ceil(config.numOldSrcIds), sizeWidth = config.wordSizeWidth, addressWidth = config.addressWidth, dataWidth = config.wordSizeInBytes * 8) -class CoalescedRequest(config: CoalescerConfig) +case class CoalescedRequest(config: CoalescerConfig) extends Request(sourceWidth = log2Ceil(config.numNewSrcIds), sizeWidth = log2Ceil(config.maxCoalLogSize), addressWidth = config.addressWidth, @@ -204,11 +204,11 @@ class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle this.error := bundle.denied } } -class NonCoalescedResponse(config: CoalescerConfig) +case class NonCoalescedResponse(config: CoalescerConfig) extends Response(sourceWidth = log2Ceil(config.numOldSrcIds), sizeWidth = config.wordSizeWidth, dataWidth = config.wordSizeInBytes * 8) -class CoalescedResponse(config: CoalescerConfig) +case class CoalescedResponse(config: CoalescerConfig) extends Response(sourceWidth = log2Ceil(config.numNewSrcIds), sizeWidth = log2Ceil(config.maxCoalLogSize), dataWidth = (8 * (1 << config.maxCoalLogSize))) From 0c8909cb43d3d48a9a2f59736e943831e914982b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 16:11:39 -0700 Subject: [PATCH 04/10] scalafmt --- src/main/scala/tilelink/Coalescing.scala | 480 ++++++++++++++--------- 1 file changed, 302 insertions(+), 178 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 373337a..0e72dae 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -152,7 +152,7 @@ class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: In fromSource = this.source, toAddress = this.address, lgSize = this.size, - data = this.data, + data = this.data ) val (glegal, gbits) = edgeOut.Get( fromSource = this.source, @@ -166,17 +166,22 @@ class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: In } } case class NonCoalescedRequest(config: CoalescerConfig) -extends Request(sourceWidth = log2Ceil(config.numOldSrcIds), - sizeWidth = config.wordSizeWidth, - addressWidth = config.addressWidth, - dataWidth = config.wordSizeInBytes * 8) + extends Request( + sourceWidth = log2Ceil(config.numOldSrcIds), + sizeWidth = config.wordSizeWidth, + addressWidth = config.addressWidth, + dataWidth = config.wordSizeInBytes * 8 + ) case class CoalescedRequest(config: CoalescerConfig) -extends Request(sourceWidth = log2Ceil(config.numNewSrcIds), - sizeWidth = log2Ceil(config.maxCoalLogSize), - addressWidth = config.addressWidth, - dataWidth = (8 * (1 << config.maxCoalLogSize))) + extends Request( + sourceWidth = log2Ceil(config.numNewSrcIds), + sizeWidth = log2Ceil(config.maxCoalLogSize), + addressWidth = config.addressWidth, + dataWidth = (8 * (1 << config.maxCoalLogSize)) + ) -class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { +class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) + extends Bundle { val op = UInt(1.W) // 0=READ 1=WRITE val size = UInt(sizeWidth.W) val source = UInt(sourceWidth.W) @@ -205,17 +210,22 @@ class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle } } case class NonCoalescedResponse(config: CoalescerConfig) -extends Response(sourceWidth = log2Ceil(config.numOldSrcIds), - sizeWidth = config.wordSizeWidth, - dataWidth = config.wordSizeInBytes * 8) + extends Response( + sourceWidth = log2Ceil(config.numOldSrcIds), + sizeWidth = config.wordSizeWidth, + dataWidth = config.wordSizeInBytes * 8 + ) case class CoalescedResponse(config: CoalescerConfig) -extends Response(sourceWidth = log2Ceil(config.numNewSrcIds), - sizeWidth = log2Ceil(config.maxCoalLogSize), - dataWidth = (8 * (1 << config.maxCoalLogSize))) + extends Response( + sourceWidth = log2Ceil(config.numNewSrcIds), + sizeWidth = log2Ceil(config.maxCoalLogSize), + dataWidth = (8 * (1 << config.maxCoalLogSize)) + ) // If `ignoreInUse`, just keep giving out new IDs without checking if it is in // use. -class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module { +class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) + extends Module { val io = IO(new Bundle { val gen = Input(Bool()) val reclaim = Input(Valid(UInt(sourceWidth.W))) @@ -234,15 +244,16 @@ class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) e io.id.valid := (if (ignoreInUse) true.B else !occupancyTable(head).valid) io.id.bits := head - when (io.gen && io.id.valid /* fire */) { + when(io.gen && io.id.valid /* fire */ ) { occupancyTable(io.id.bits).valid := true.B // mark in use } - when (io.reclaim.valid) { + when(io.reclaim.valid) { occupancyTable(io.reclaim.bits).valid := false.B // mark freed } } -class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module { +class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) + extends Module { val io = IO(new Bundle { val queue = new Bundle { val enq = Vec(config.numLanes, DeqIO(gen.cloneType)) @@ -259,7 +270,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e // eltPrototype.valid := false.B val elts = Reg(Vec(config.numLanes, Vec(entries, Valid(gen)))) - val writePtr = RegInit(VecInit(Seq.fill(config.numLanes)(0.asUInt(log2Ceil(entries + 1).W)))) + val writePtr = RegInit( + VecInit(Seq.fill(config.numLanes)(0.asUInt(log2Ceil(entries + 1).W))) + ) val deqDone = RegInit(VecInit(Seq.fill(config.numLanes)(false.B))) private def resetElts = { @@ -270,7 +283,7 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e } } } - when (reset.asBool) { + when(reset.asBool) { resetElts } @@ -286,14 +299,17 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e // current cycle. // // shift hint is when the heads have no more coalescable left this or next cycle - val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, inv) => - c && !(io.invalidate.valid && inv) - }.reduce(_ || _) + val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))) + .map { case (c, inv) => + c && !(io.invalidate.valid && inv) + } + .reduce(_ || _) val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _) // valid && !fire means we enable enqueueing to a full queue, provided the // arbiter is taking away all remaining valid queue heads in the next cycle so // that we make space for the entire next warp. - val syncedDeqValidNextCycle = io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) + val syncedDeqValidNextCycle = + io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) for (i <- 0 until config.numLanes) { val enq = io.queue.enq(i) @@ -313,20 +329,22 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e // can take new entries if not empty, or if full but shifting enq.ready := (!ctrl.full) || ctrl.shift - when (ctrl.shift) { + when(ctrl.shift) { // shift, invalidate tail, invalidate coalesced requests elts(i).zipWithIndex.foreach { case (elt, j) => if (j == entries - 1) { // tail elt.valid := false.B } else { elt.bits := elts(i)(j + 1).bits - elt.valid := elts(i)(j + 1).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) + elt.valid := elts(i)( + j + 1 + ).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) } } // reset dequeue mask when new entries are shifted in deqDone(i) := false.B // enqueue - when (enq.ready && syncedEnqValid) { // to allow drift, swap for enq.fire + when(enq.ready && syncedEnqValid) { // to allow drift, swap for enq.fire elts(i)(writePtr(i) - 1.U).bits := enq.bits elts(i)(writePtr(i) - 1.U).valid := enq.valid }.otherwise { @@ -334,13 +352,13 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e } }.otherwise { // invalidate coalesced requests - when (io.invalidate.valid) { + when(io.invalidate.valid) { (elts(i) zip io.invalidate.bits(i).asBools).map { case (elt, inv) => elt.valid := elt.valid && !inv } } // enqueue - when (enq.ready && syncedEnqValid) { + when(enq.ready && syncedEnqValid) { elts(i)(writePtr(i)).bits := enq.bits elts(i)(writePtr(i)).valid := enq.valid writePtr(i) := writePtr(i) + 1.U @@ -352,8 +370,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e // When doing spatial-only coalescing, queues should never drift from each // other, i.e. the queue heads should always contain mem requests from the // same instruction. - val queueInSync = controlSignals.map(_ === controlSignals.head).reduce(_ && _) && - writePtr.map(_ === writePtr.head).reduce(_ && _) + val queueInSync = + controlSignals.map(_ === controlSignals.head).reduce(_ && _) && + writePtr.map(_ === writePtr.head).reduce(_ && _) assert(queueInSync, "shift queue lanes are not in sync") io.mask := elts.map(x => VecInit(x.map(_.valid)).asUInt) @@ -361,8 +380,11 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e } // Software model: coalescer.py -class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedRequest], - config: CoalescerConfig) extends Module { +class MonoCoalescer( + coalLogSize: Int, + windowT: CoalShiftQueue[NonCoalescedRequest], + config: CoalescerConfig +) extends Module { val io = IO(new Bundle { val window = Input(windowT.io.cloneType) val results = Output(new Bundle { @@ -371,8 +393,10 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W))) // number of entries matched with this leader lane's head. // maximum is numLanes * queueDepth - val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W)) - val coverageHits = Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W)) + val matchCount = + Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W)) + val coverageHits = + Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W)) val canCoalesce = Output(Vec(config.numLanes, Bool())) }) }) @@ -386,9 +410,13 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques val leadersValid = io.window.mask.map(_.asBools.head) def printQueueHeads = { - leaders.zipWithIndex.foreach{ case (head, i) => - printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n", - leadersValid(i), head.source, head.address) + leaders.zipWithIndex.foreach { case (head, i) => + printf( + s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n", + leadersValid(i), + head.source, + head.address + ) } } // when (leadersValid.reduce(_ || _)) { @@ -406,34 +434,42 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques // Gives a 2-D table of Bools representing match at every queue entry, // for each lane (so 3-D in total). // dimensions: (leader lane, follower lane, follower entry) - val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) => - (io.window.elts zip io.window.mask).map { case (followers, followerValids) => - // compare leader's head against follower's every queue entry - (followers zip followerValids.asBools).map { case (follower, followerValid) => - canMatch(follower, followerValid, leader, leaderValid) - // FIXME: disabling halving optimization because it does not give the - // correct per-lane coalescable indication to the shift queue - // // match leader to only followers at lanes >= leader idx - // // this halves the number of comparators - // if (followerIndex < leaderIndex) false.B - // else canMatch(follower, followerValid, leader, leaderValid) + val matchTablePerLane = (leaders zip leadersValid).map { + case (leader, leaderValid) => + (io.window.elts zip io.window.mask).map { + case (followers, followerValids) => + // compare leader's head against follower's every queue entry + (followers zip followerValids.asBools).map { + case (follower, followerValid) => + canMatch(follower, followerValid, leader, leaderValid) + // FIXME: disabling halving optimization because it does not give the + // correct per-lane coalescable indication to the shift queue + // // match leader to only followers at lanes >= leader idx + // // this halves the number of comparators + // if (followerIndex < leaderIndex) false.B + // else canMatch(follower, followerValid, leader, leaderValid) + } } - } } val matchCounts = matchTablePerLane.map(table => - table.map(PopCount(_)) // sum up each column - .reduce(_ +& _)) + table + .map(PopCount(_)) // sum up each column + .reduce(_ +& _) + ) val canCoalesce = matchCounts.map(_ > 1.U) // Elect the leader that has the most match counts. // TODO: potentially expensive: magnitude comparator def chooseLeaderArgMax(matchCounts: Seq[UInt]): UInt = { - matchCounts.zipWithIndex.map { - case (c, i) => (c, i.U) - }.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) => + matchCounts.zipWithIndex + .map { case (c, i) => + (c, i.U) + } + .reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) => (Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j)) - }._2 + } + ._2 } // Elect leader by choosing the smallest-index lane that has a valid // match, i.e. using priority encoder. @@ -444,7 +480,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux // matchTable for the chosen lane, but converted to a Vec[UInt] - val chosenMatches = VecInit(matchTablePerLane.map{ table => + val chosenMatches = VecInit(matchTablePerLane.map { table => VecInit(table.map(VecInit(_).asUInt)) })(chosenLeaderIdx) val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx) @@ -452,18 +488,21 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques // coverage calculation def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth) // 2-D table flattened to 1-D - val offsets = io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) + val offsets = + io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) val valids = chosenMatches.flatMap(_.asBools) // indicates for each word in the coalesced chunk whether it is accessed by // any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four // words in the coalesced data coming back will be accessed by some request // and we've reached 100% bandwidth utilization. val hits = Seq.tabulate(1 << (size - config.wordSizeWidth)) { target => - (offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _) + (offsets zip valids) + .map { case (offset, valid) => valid && (offset === target.U) } + .reduce(_ || _) } // debug prints - when (leadersValid.reduce(_ || _)) { + when(leadersValid.reduce(_ || _)) { matchCounts.zipWithIndex.foreach { case (count, i) => printf(s"lane[${i}] matchCount = %d\n", count); } @@ -492,20 +531,26 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques // coalesced request out of all possible combinations. // // Software model: coalescer.py -class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Request, - config: CoalescerConfig) extends Module { +class MultiCoalescer( + windowT: CoalShiftQueue[NonCoalescedRequest], + coalReqT: Request, + config: CoalescerConfig +) extends Module { val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues val window = Input(windowT.io.cloneType) // generated coalesced request val coalReq = DecoupledIO(coalReqT.cloneType) // invalidate signals going into each request queue's head - val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) + val invalidate = + Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) // whether a lane is coalescable val coalescable = Output(Vec(config.numLanes, Bool())) }) - val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config))) + val coalescers = config.coalLogSizes.map(size => + Module(new MonoCoalescer(size, windowT, config)) + ) coalescers.foreach(_.io.window := io.window) def normalize(valPerSize: Seq[UInt]): Seq[UInt] = { @@ -530,9 +575,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W)) val chosenValid = Wire(Bool()) // minimum 25% coverage - val minCoverage = 1.max(1 << ((config.maxCoalLogSize - config.wordSizeWidth) - 2)) + val minCoverage = + 1.max(1 << ((config.maxCoalLogSize - config.wordSizeWidth) - 2)) - when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { + when(normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { chosenSizeIdx := argMax(normalizedHits) chosenValid := true.B printf("coalescing success by coverage policy\n") @@ -562,9 +608,14 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req val flatMatches = chosenBundle.matchOH.flatMap(_.asBools) // check for word alignment in addresses - assert(io.window.elts.flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U)).zip( - io.window.mask.flatMap(_.asBools)).map { case (aligned, valid) => (!valid) || aligned }.reduce(_ || _), - "one or more addresses used for coalescing is not word-aligned") + assert( + io.window.elts + .flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U)) + .zip(io.window.mask.flatMap(_.asBools)) + .map { case (aligned, valid) => (!valid) || aligned } + .reduce(_ || _), + "one or more addresses used for coalescing is not word-aligned" + ) // note: this is word-level coalescing. if finer granularity is needed, need to modify code val numWords = (1.U << (chosenSize - config.wordSizeWidth.U)).asUInt @@ -579,18 +630,29 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req val sel = flatReqs.zip(flatMatches).map { case (req, m) => // note: ANDing against addrMask is to conform to active byte lanes requirements // if aligning to LSB suffices, we should add the bitwise AND back - m && ((req.address(config.maxCoalLogSize - 1, config.wordSizeWidth)/* & addrMask*/) === i.U) + m && ((req.address( + config.maxCoalLogSize - 1, + config.wordSizeWidth + ) /* & addrMask*/ ) === i.U) } // TODO: SW uses priority encoder, not sure about behavior of MuxCase - data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) => - s -> req.data - }) - mask(i) := MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) => - s -> req.mask - }) + data(i) := MuxCase( + DontCare, + flatReqs.zip(sel).map { case (req, s) => + s -> req.data + } + ) + mask(i) := MuxCase( + 0.U, + flatReqs.zip(sel).map { case (req, s) => + s -> req.mask + } + ) } - val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))) + val sourceGen = Module( + new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds)) + ) sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created sourceGen.io.reclaim.valid := false.B // not used sourceGen.io.reclaim.bits := DontCare // not used @@ -608,7 +670,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req io.invalidate.bits := chosenBundle.matchOH io.invalidate.valid := io.coalReq.fire // invalidate only when fire - io.coalescable := coalescers.map(_.io.results.canCoalesce.asUInt).reduce(_ | _).asBools + io.coalescable := coalescers + .map(_.io.results.canCoalesce.asUInt) + .reduce(_ | _) + .asBools dontTouch(io.invalidate) // debug @@ -620,21 +685,30 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req if (!config.enable) disable } -class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) { - require(outer.cpuNode.in.length == config.numLanes, +class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) + extends LazyModuleImp(outer) { + require( + outer.cpuNode.in.length == config.numLanes, s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as " + - s"config.numLanes (${config.numLanes})") - require(outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds), + s"config.numLanes (${config.numLanes})" + ) + require( + outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds), s"TL param sourceBits (${outer.cpuNode.in.head._1.params.sourceBits}) " + - s"mismatch with log2(config.numOldSrcIds) (${log2Ceil(config.numOldSrcIds)})") - require(outer.cpuNode.in.head._1.params.addressBits == config.addressWidth, + s"mismatch with log2(config.numOldSrcIds) (${log2Ceil(config.numOldSrcIds)})" + ) + require( + outer.cpuNode.in.head._1.params.addressBits == config.addressWidth, s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " + - s"mismatch with config.addressWidth (${config.addressWidth})") + s"mismatch with config.addressWidth (${config.addressWidth})" + ) val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits // note we are using word size. assuming all coalescer inputs are word sized val reqQueueEntryT = new NonCoalescedRequest(config) - val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)) + val reqQueues = Module( + new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config) + ) val coalReqT = new CoalescedRequest(config) val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config)) @@ -710,7 +784,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. tlCoal.e.valid := false.B - // =========================================================================== // Response flow // =========================================================================== @@ -723,8 +796,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends val numPerLaneReqs = config.queueDepth // FIXME: no need to contain maxCoalLogSize data - val respQueueEntryT = new Response(oldSourceWidth, log2Ceil(config.maxCoalLogSize), - (1 << config.maxCoalLogSize) * 8) + val respQueueEntryT = new Response( + oldSourceWidth, + log2Ceil(config.maxCoalLogSize), + (1 << config.maxCoalLogSize) * 8 + ) val respQueues = Seq.tabulate(config.numLanes) { _ => Module( new MultiPortQueue( @@ -810,12 +886,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends val newEntry = Wire(uncoalescer.inflightTable.entryT) newEntry.source := coalescer.io.coalReq.bits.source - assert (config.maxCoalLogSize <= config.dataBusWidth, - "multi-beat coalesced reads/writes are currently not supported") - assert ( + assert( + config.maxCoalLogSize <= config.dataBusWidth, + "multi-beat coalesced reads/writes are currently not supported" + ) + assert( tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" - + s" (${(1 << config.dataBusWidth) * 8})" + + s" (${(1 << config.dataBusWidth) * 8})" ) val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits) // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the @@ -825,8 +903,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends (laneEntry.reqs zip laneInv.asBools).zipWithIndex .foreach { case ((reqEntry, inv), i) => val req = reqQueues.io.elts(lane)(i) - when ((coalescer.io.invalidate.valid && inv)) { - printf(s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", req.source) + when((coalescer.io.invalidate.valid && inv)) { + printf( + s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", + req.source + ) } reqEntry.valid := (coalescer.io.invalidate.valid && inv) reqEntry.source := req.source @@ -845,22 +926,23 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends tlCoal.d.ready := uncoalescer.io.coalResp.ready // Connect uncoalescer results back into each lane's response queue - (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) => - perLaneResps.zipWithIndex.foreach { case (resp, i) => - // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream - // cache. This should ideally not happen though. - assert( - q.io.enq(respQueueUncoalPortOffset + i).ready, - s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}" - ) - q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid - q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits + (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { + case ((q, perLaneResps), lane) => + perLaneResps.zipWithIndex.foreach { case (resp, i) => + // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream + // cache. This should ideally not happen though. + assert( + q.io.enq(respQueueUncoalPortOffset + i).ready, + s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}" + ) + q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid + q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits // debug // when (resp.valid) { // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") // } // dontTouch(q.io.enq(respQueueCoalPortOffset)) - } + } } // Debug @@ -972,7 +1054,8 @@ class Uncoalescer(config: CoalescerConfig) extends Module { // split the coalesced response back to individual per-lane responses with the // right metadata. class InflightCoalReqTable(config: CoalescerConfig) extends Module { - val offsetBits = config.maxCoalLogSize - config.wordSizeWidth // assumes word offset + val offsetBits = + config.maxCoalLogSize - config.wordSizeWidth // assumes word offset val entryT = new InflightCoalReqTableEntry( config.numLanes, config.queueDepth, @@ -1019,7 +1102,7 @@ class InflightCoalReqTable(config: CoalescerConfig) extends Module { } val full = Wire(Bool()) - full := (0 until entries).map( table(_).valid ).reduce( _ && _ ) + full := (0 until entries).map(table(_).valid).reduce(_ && _) assert(!full, "inflight table is full and blocking coalescer") dontTouch(full) @@ -1094,8 +1177,12 @@ object TLUtils { // `traceHasSource` is true if the input trace file has an additional source // ID column. This is useful for using the output trace file genereated by // MemTraceLogger as the driver. -class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: Boolean = false) - (implicit p: Parameters) extends LazyModule { +class MemTraceDriver( + config: CoalescerConfig, + filename: String, + traceHasSource: Boolean = false +)(implicit p: Parameters) + extends LazyModule { // Create N client nodes together val laneNodes = Seq.tabulate(config.numLanes) { i => val clientParam = Seq( @@ -1113,7 +1200,8 @@ class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: val node = TLIdentityNode() laneNodes.foreach { l => node := l } - lazy val module = new MemTraceDriverImp(this, config, filename, traceHasSource) + lazy val module = + new MemTraceDriverImp(this, config, filename, traceHasSource) } trait HasTraceLine { @@ -1136,9 +1224,12 @@ class TraceLine extends Bundle with HasTraceLine { val data = UInt(64.W) } -class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String, - traceHasSource: Boolean) - extends LazyModuleImp(outer) +class MemTraceDriverImp( + outer: MemTraceDriver, + config: CoalescerConfig, + filename: String, + traceHasSource: Boolean +) extends LazyModuleImp(outer) with UnitTestModule { // Current cycle mark to read from trace val traceReadCycle = RegInit(1.U(64.W)) @@ -1176,7 +1267,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename // Not all fire because trace cycle has to advance even when there is no valid // line in the trace. - when (reqQueueAllReady){ + when(reqQueueAllReady) { traceReadCycle := traceReadCycle + 1.U } @@ -1216,11 +1307,16 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename sizeInBytes := (1.U) << req.size mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U) wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data) - val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) + val wordAlignedAddress = + req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) val wordAlignedSize = Mux(subword, 2.U, req.size) - val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numOldSrcIds), - ignoreInUse = false)) + val sourceGen = Module( + new RoundRobinSourceGenerator( + log2Ceil(config.numOldSrcIds), + ignoreInUse = false + ) + ) sourceGen.io.gen := reqQ.io.deq.fire // assert(sourceGen.io.id.valid) @@ -1229,7 +1325,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename toAddress = hashToValidPhyAddr(wordAlignedAddress), lgSize = wordAlignedSize, // trace line already holds log2(size) // data should be aligned to beatBytes - data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt + data = + (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt ) val (glegal, gbits) = edge.Get( fromSource = sourceGen.io.id.bits, @@ -1240,7 +1337,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename val bits = Mux(req.is_store, pbits, gbits) tlOut.a.valid := (reqQ.io.deq.valid && sourceGen.io.id.valid) - when (tlOut.a.valid) { + when(tlOut.a.valid) { assert(legal, "illegal TL req gen") } tlOut.a.bits := bits @@ -1288,9 +1385,11 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean) extends BlackBox( - Map("FILENAME" -> filename, - "NUM_LANES" -> numLanes, - "HAS_SOURCE" -> (if (traceHasSource) 1 else 0)) + Map( + "FILENAME" -> filename, + "NUM_LANES" -> numLanes, + "HAS_SOURCE" -> (if (traceHasSource) 1 else 0) + ) ) with HasBlackBoxResource { val traceLineT = new TraceLine @@ -1304,19 +1403,20 @@ class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean) // These names have to match declarations in the Verilog code, eg. // trace_read_address. - val trace_read = new Bundle { // can't use HasTraceLine because this doesn't have source - val ready = Input(Bool()) - val valid = Output(UInt(numLanes.W)) - // Chisel can't interface with Verilog 2D port, so flatten all lanes into - // single wide 1D array. - // TODO: assumes 64-bit address. - val cycle = Input(UInt(64.W)) - val address = Output(UInt((addrW * numLanes).W)) - val is_store = Output(UInt(numLanes.W)) - val size = Output(UInt((sizeW * numLanes).W)) - val data = Output(UInt((dataW * numLanes).W)) - val finished = Output(Bool()) - } + val trace_read = + new Bundle { // can't use HasTraceLine because this doesn't have source + val ready = Input(Bool()) + val valid = Output(UInt(numLanes.W)) + // Chisel can't interface with Verilog 2D port, so flatten all lanes into + // single wide 1D array. + // TODO: assumes 64-bit address. + val cycle = Input(UInt(64.W)) + val address = Output(UInt((addrW * numLanes).W)) + val is_store = Output(UInt(numLanes.W)) + val size = Output(UInt((sizeW * numLanes).W)) + val data = Output(UInt((dataW * numLanes).W)) + val finished = Output(Bool()) + } }) addResource("/vsrc/SimMemTrace.v") @@ -1443,11 +1543,11 @@ class MemTraceLogger( // This assert only holds true for PutFullData and not PutPartialData, // where HIGH bits in the mask may not be contiguous. - when (tlIn.a.valid) { + when(tlIn.a.valid) { assert( PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), "mask HIGH popcount do not match the TL size. " + - "Partial masks are not allowed for PutFull" + "Partial masks are not allowed for PutFull" ) } val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) @@ -1476,17 +1576,25 @@ class MemTraceLogger( // stats val numReqsThisCycle = - laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 } + laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { + (v0, v1) => v0 + v1 + } val numRespsThisCycle = - laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 } + laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { + (v0, v1) => v0 + v1 + } val reqBytesThisCycle = - laneReqs.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) => - b0 + b1 - } + laneReqs + .map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) } + .reduce { (b0, b1) => + b0 + b1 + } val respBytesThisCycle = - laneResps.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) => - b0 + b1 - } + laneResps + .map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) } + .reduce { (b0, b1) => + b0 + b1 + } numReqs := numReqs + numReqsThisCycle numResps := numResps + numRespsThisCycle reqBytes := reqBytes + reqBytesThisCycle @@ -1496,7 +1604,10 @@ class MemTraceLogger( // // This is a clunky workaround of the fact that Chisel doesn't allow partial // assignment to a bitfield range of a wide signal. - def flattenTrace(simIO: Bundle with HasTraceLine, perLane: Vec[TraceLine]) = { + def flattenTrace( + simIO: Bundle with HasTraceLine, + perLane: Vec[TraceLine] + ) = { // these will get optimized out val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid))) val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source))) @@ -1592,8 +1703,14 @@ object TLPrintf { tlData: UInt, reqData: UInt ) = { - printf(s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d", - source, address, size, mask, is_store) + printf( + s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d", + source, + address, + size, + mask, + is_store + ) when(is_store) { printf(", tlData=%x, reqData=%x", tlData, reqData) } @@ -1604,7 +1721,7 @@ object TLPrintf { // Synthesizable unit tests class DummyDriver(config: CoalescerConfig)(implicit p: Parameters) - extends LazyModule { + extends LazyModule { val laneNodes = Seq.tabulate(config.numLanes) { i => val clientParam = Seq( TLMasterParameters.v1( @@ -1640,7 +1757,10 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig) // generate dummy traffic to coalescer to prevent it from being optimized // out during synthesis val address = Wire(UInt(config.addressWidth.W)) - address := Cat((finishCounter + (lane.U % 3.U)), 0.U(config.wordSizeWidth.W)) + address := Cat( + (finishCounter + (lane.U % 3.U)), + 0.U(config.wordSizeWidth.W) + ) val (tl, edge) = node.out(0) val (legal, bits) = edge.Put( fromSource = sourceIdCounter, @@ -1657,11 +1777,13 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig) tl.e.valid := false.B } - val dataSum = outer.laneNodes.map { node => - val tl = node.out(0)._1 - val data = Mux(tl.d.valid, tl.d.bits.data, 0.U) - data - }.reduce (_ +& _) + val dataSum = outer.laneNodes + .map { node => + val tl = node.out(0)._1 + val data = Mux(tl.d.valid, tl.d.bits.data, 0.U) + data + } + .reduce(_ +& _) // this doesn't make much sense, but it prevents the entire uncoalescer from // being optimized away finishCounter := finishCounter + dataSum @@ -1680,8 +1802,10 @@ class DummyCoalescer(implicit p: Parameters) extends LazyModule { // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // edges globally, by way of Diplomacy communicating the TL slave // parameters to the upstream nodes. - new TLRAM(address = AddressSet(0x0000, 0xffffff), - beatBytes = (1 << config.dataBusWidth)) + new TLRAM( + address = AddressSet(0x0000, 0xffffff), + beatBytes = (1 << config.dataBusWidth) + ) ) ) @@ -1704,7 +1828,8 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) } // tracedriver --> coalescer --> tracelogger --> tlram -class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends LazyModule { +class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) + extends LazyModule { val numLanes = p(SIMTCoreKey).get.nLanes val config = defaultConfig.copy(numLanes = numLanes) @@ -1713,14 +1838,18 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends Laz new MemTraceLogger(numLanes, filename, loggerName = "coreside") ) val coal = LazyModule(new CoalescingUnit(config)) - val memSideLogger = LazyModule(new MemTraceLogger(numLanes + 1, filename, loggerName = "memside")) + val memSideLogger = LazyModule( + new MemTraceLogger(numLanes + 1, filename, loggerName = "memside") + ) val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge LazyModule( // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // edges globally, by way of Diplomacy communicating the TL slave // parameters to the upstream nodes. - new TLRAM(address = AddressSet(0x0000, 0xffffff), - beatBytes = (1 << config.dataBusWidth)) + new TLRAM( + address = AddressSet(0x0000, 0xffffff), + beatBytes = (1 << config.dataBusWidth) + ) ) ) @@ -1751,8 +1880,9 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends Laz } } -class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit p: Parameters) - extends UnitTest(timeout) { +class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit + p: Parameters +) extends UnitTest(timeout) { val dut = Module(LazyModule(new TLRAMCoalescerLogger(filename)).module) dut.io.start := io.start io.finished := dut.io.finished @@ -1770,8 +1900,10 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule { // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // edges globally, by way of Diplomacy communicating the TL slave // parameters to the upstream nodes. - new TLRAM(address = AddressSet(0x0000, 0xffffff), - beatBytes = (1 << defaultConfig.dataBusWidth)) + new TLRAM( + address = AddressSet(0x0000, 0xffffff), + beatBytes = (1 << defaultConfig.dataBusWidth) + ) ) ) @@ -1785,13 +1917,13 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule { } } -class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) { +class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) + extends UnitTest(timeout) { val dut = Module(LazyModule(new TLRAMCoalescer).module) dut.io.start := io.start io.finished := dut.io.finished } - //////////// //////////// //////////// @@ -1941,11 +2073,3 @@ class CoalescerXbarImpl(outer: CoalescerXbar, } - - - - - - - - From 772deda9c2e9fd6c76b85d8dd2e1693e7089753e Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 16:20:01 -0700 Subject: [PATCH 05/10] Fix ChiselEnum experimental warning --- src/main/scala/tilelink/Coalescing.scala | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 0e72dae..98eb668 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -4,7 +4,6 @@ package freechips.rocketchip.tilelink import chisel3._ import chisel3.util._ -import chisel3.experimental.ChiselEnum import org.chipsalliance.cde.config.{Parameters, Field} import freechips.rocketchip.diplomacy._ // import freechips.rocketchip.devices.tilelink.TLTestRAM @@ -381,9 +380,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) // Software model: coalescer.py class MonoCoalescer( + config: CoalescerConfig, coalLogSize: Int, - windowT: CoalShiftQueue[NonCoalescedRequest], - config: CoalescerConfig + windowT: CoalShiftQueue[NonCoalescedRequest] ) extends Module { val io = IO(new Bundle { val window = Input(windowT.io.cloneType) @@ -532,9 +531,9 @@ class MonoCoalescer( // // Software model: coalescer.py class MultiCoalescer( + config: CoalescerConfig, windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Request, - config: CoalescerConfig ) extends Module { val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues @@ -549,7 +548,7 @@ class MultiCoalescer( }) val coalescers = config.coalLogSizes.map(size => - Module(new MonoCoalescer(size, windowT, config)) + Module(new MonoCoalescer(config, size, windowT)) ) coalescers.foreach(_.io.window := io.window) @@ -704,14 +703,13 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) ) val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits - // note we are using word size. assuming all coalescer inputs are word sized val reqQueueEntryT = new NonCoalescedRequest(config) val reqQueues = Module( new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config) ) val coalReqT = new CoalescedRequest(config) - val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config)) + val coalescer = Module(new MultiCoalescer(config, reqQueues, coalReqT)) coalescer.io.window := reqQueues.io reqQueues.io.coalescable := coalescer.io.coalescable reqQueues.io.invalidate := coalescer.io.invalidate @@ -955,7 +953,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) } class Uncoalescer(config: CoalescerConfig) extends Module { - // notes to hansung: + // Mapping to reference model param names // val numLanes: Int, <-> config.NUM_LANES // val numPerLaneReqs: Int, <-> config.DEPTH // val sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS) From 0df319288275f3b683ff2301c921c1e84a0a225c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 17:31:51 -0700 Subject: [PATCH 06/10] Revamp uncoalescer IO Connect coalescer output directly to the uncoalescer at the toplevel, and do table entry construction entirely inside the module. WIP: unittest is very broken as a result of this. --- src/main/scala/tilelink/Coalescing.scala | 196 ++++++++++++----------- 1 file changed, 105 insertions(+), 91 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 98eb668..e7e69f8 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -41,6 +41,13 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum { } } +// Mapping to reference model param names +// numLanes: Int, <-> config.NUM_LANES +// numPerLaneReqs: Int, <-> config.DEPTH +// sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS) +// sizeWidth: Int, <-> config.sizeEnum.width +// coalDataWidth: Int, <-> (1 << config.MAX_SIZE) +// numInflightCoalRequests: Int <-> config.NUM_NEW_IDS case class CoalescerConfig( enable: Boolean, // globally enable or disable coalescing numLanes: Int, // number of lanes (or threads) in a warp @@ -137,7 +144,8 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La // Protocol-agnostic bundles that represent a request and a response to the // coalescer. -class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) extends Bundle { +class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) + extends Bundle { require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8") val op = UInt(1.W) // 0=READ 1=WRITE val address = UInt(addressWidth.W) @@ -181,6 +189,7 @@ case class CoalescedRequest(config: CoalescerConfig) class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { + require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8") val op = UInt(1.W) // 0=READ 1=WRITE val size = UInt(sizeWidth.W) val source = UInt(sourceWidth.W) @@ -382,10 +391,10 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) class MonoCoalescer( config: CoalescerConfig, coalLogSize: Int, - windowT: CoalShiftQueue[NonCoalescedRequest] + queueT: CoalShiftQueue[NonCoalescedRequest] ) extends Module { val io = IO(new Bundle { - val window = Input(windowT.io.cloneType) + val window = Input(queueT.io.cloneType) val results = Output(new Bundle { val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val baseAddr = Output(UInt(config.addressWidth.W)) @@ -478,7 +487,8 @@ class MonoCoalescer( val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts) val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux - // matchTable for the chosen lane, but converted to a Vec[UInt] + // matchTable for the chosen lane, but each column converted to bitflags, + // i.e. Vec[UInt] val chosenMatches = VecInit(matchTablePerLane.map { table => VecInit(table.map(VecInit(_).asUInt)) })(chosenLeaderIdx) @@ -532,23 +542,25 @@ class MonoCoalescer( // Software model: coalescer.py class MultiCoalescer( config: CoalescerConfig, - windowT: CoalShiftQueue[NonCoalescedRequest], + queueT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Request, ) extends Module { + val invalidateT = Valid(Vec(config.numLanes, UInt(config.queueDepth.W))) val io = IO(new Bundle { // coalescing window, connected to the contents of the request queues - val window = Input(windowT.io.cloneType) + val window = Input(queueT.io.cloneType) // generated coalesced request val coalReq = DecoupledIO(coalReqT.cloneType) - // invalidate signals going into each request queue's head - val invalidate = - Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) - // whether a lane is coalescable + // invalidate signals going into each request queue's head. Lanes with + // high invalidate bits are what became coalesced into the new request. + val invalidate = Output(invalidateT) + // whether a lane is coalescable. This is used to output non-coalescable + // lanes to the arbiter so they can be flushed to downstream. val coalescable = Output(Vec(config.numLanes, Bool())) }) val coalescers = config.coalLogSizes.map(size => - Module(new MonoCoalescer(config, size, windowT)) + Module(new MonoCoalescer(config, size, queueT)) ) coalescers.foreach(_.io.window := io.window) @@ -701,11 +713,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " + s"mismatch with config.addressWidth (${config.addressWidth})" ) + require( + config.maxCoalLogSize <= config.dataBusWidth, + "multi-beat coalesced reads/writes are currently not supported" + ) val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits - val reqQueueEntryT = new NonCoalescedRequest(config) + val nonCoalReqT = new NonCoalescedRequest(config) val reqQueues = Module( - new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config) + new CoalShiftQueue(nonCoalReqT, config.queueDepth, config) ) val coalReqT = new CoalescedRequest(config) @@ -725,7 +741,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) (outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach { case (((tlIn, _), (tlOut, edgeOut)), lane) => // Request queue - val req = Wire(reqQueueEntryT) + val req = Wire(nonCoalReqT) req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode) req.source := tlIn.a.bits.source @@ -782,6 +798,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. tlCoal.e.valid := false.B + require( + tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, + s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" + + s" (${(1 << config.dataBusWidth) * 8})" + ) + // =========================================================================== // Response flow // =========================================================================== @@ -870,57 +892,21 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) dontTouch(tlOut.d) } - // Construct new entry for the inflight table - // FIXME: don't instantiate inflight table entry type here. It leaks the table's impl - // detail to the coalescer - - // richard: I think a good idea is to pass Valid[ReqQueueEntry] generated by - // the coalescer directly into the uncoalescer, so that we can offload the - // logic to generate the Inflight Entry into the uncoalescer, where it should be. - // this also reduces top level clutter. - - val uncoalescer = Module(new Uncoalescer(config)) - - val newEntry = Wire(uncoalescer.inflightTable.entryT) - newEntry.source := coalescer.io.coalReq.bits.source - - assert( - config.maxCoalLogSize <= config.dataBusWidth, - "multi-beat coalesced reads/writes are currently not supported" - ) - assert( - tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, - s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" - + s" (${(1 << config.dataBusWidth) * 8})" + val uncoalescer = Module( + new Uncoalescer(config, reqQueues, coalReqT, coalescer.invalidateT) ) + // connect coalesced request that is newly generated and being recorded in + // the uncoalescer + uncoalescer.io.coalReq <> coalescer.io.coalReq + uncoalescer.io.invalidate := coalescer.io.invalidate val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits) - // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the - // coalescer to every (numLanes * queueDepth) entry in the inflight table. - (newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex - .foreach { case ((laneEntry, laneInv), lane) => - (laneEntry.reqs zip laneInv.asBools).zipWithIndex - .foreach { case ((reqEntry, inv), i) => - val req = reqQueues.io.elts(lane)(i) - when((coalescer.io.invalidate.valid && inv)) { - printf( - s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", - req.source - ) - } - reqEntry.valid := (coalescer.io.invalidate.valid && inv) - reqEntry.source := req.source - reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth) - reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size) - // TODO: load/store op - } - } - dontTouch(newEntry) - - uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid - uncoalescer.io.newEntry := newEntry + uncoalescer.io.window := reqQueues.io + // connect coalesced response going into the uncoalescer, ready to be + // uncoalesced // Cleanup: custom <>? uncoalescer.io.coalResp.valid := tlCoal.d.valid uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits) + // uncoalescer backpressure tlCoal.d.ready := uncoalescer.io.coalResp.ready // Connect uncoalescer results back into each lane's response queue @@ -935,11 +921,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) ) q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits - // debug - // when (resp.valid) { - // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") - // } - // dontTouch(q.io.enq(respQueueCoalPortOffset)) + // debug + // when (resp.valid) { + // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") + // } + // dontTouch(q.io.enq(respQueueCoalPortOffset)) } } @@ -952,48 +938,76 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) dontTouch(tlCoal.d) } -class Uncoalescer(config: CoalescerConfig) extends Module { - // Mapping to reference model param names - // val numLanes: Int, <-> config.NUM_LANES - // val numPerLaneReqs: Int, <-> config.DEPTH - // val sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS) - // val sizeWidth: Int, <-> config.sizeEnum.width - // val coalDataWidth: Int, <-> (1 << config.MAX_SIZE) - // val numInflightCoalRequests: Int <-> config.NUM_NEW_IDS +class Uncoalescer( + config: CoalescerConfig, + queueT: CoalShiftQueue[NonCoalescedRequest], + coalReqT: Request, + coalInvalidateT: Valid[Vec[UInt]], +) extends Module { val inflightTable = Module(new InflightCoalReqTable(config)) val io = IO(new Bundle { - val coalReqValid = Input(Bool()) - // FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer - val newEntry = Input(inflightTable.entryT.cloneType) + // generated coalesced request, connected to the output of the coalescer. + val coalReq = Flipped(DecoupledIO(coalReqT.cloneType)) + // invalidate signal coming out of coalescer. + val invalidate = Input(coalInvalidateT.cloneType) + // coalescing window, connected to the contents of the request queues. + // Uncoalescer looks at the queue entries that got coalesced into `coalReq` + // in order to record which lanes this coalReq originally came from. + val window = Input(queueT.io.cloneType) val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) val uncoalResps = Output( Vec( config.numLanes, - Vec( - config.queueDepth, - ValidIO( - new NonCoalescedResponse(config) - ) - ) + Vec(config.queueDepth, ValidIO(new NonCoalescedResponse(config))) ) ) }) - // Populate inflight table - inflightTable.io.enq.valid := io.coalReqValid - inflightTable.io.enq.bits := io.newEntry + // Uncoalescer has to be always ready to accept and record new coalesced + // requests, so that it doesn't stall the coalescer. + io.coalReq.ready := true.B + + // Construct a new entry for the inflight table using generated coalesced request + def generateInflightTableEntry: InflightCoalReqTableEntry = { + val newEntry = Wire(inflightTable.entryT) + newEntry.source := io.coalReq.bits.source + // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the + // coalescer to every (numLanes * queueDepth) entry in the inflight table. + (newEntry.lanes zip io.invalidate.bits).zipWithIndex + .foreach { case ((laneEntry, laneInv), lane) => + (laneEntry.reqs zip laneInv.asBools).zipWithIndex + .foreach { case ((reqEntry, inv), i) => + val req = io.window.elts(lane)(i) + when((io.invalidate.valid && inv)) { + printf( + s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", + req.source + ) + } + reqEntry.valid := (io.invalidate.valid && inv) + reqEntry.source := req.source + reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth) + reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size) + // TODO: load/store op + } + } + assert( + !((io.coalReq.valid === true.B) && (io.coalResp.valid === true.B) && + (newEntry.source === io.coalResp.bits.source)), + "inflight table: enqueueing and looking up the same srcId at the same cycle is not handled" + ) + dontTouch(newEntry) + + newEntry + } + inflightTable.io.enq.valid := io.coalReq.valid + inflightTable.io.enq.bits := generateInflightTableEntry // Look up the table with incoming coalesced responses inflightTable.io.lookup.ready := io.coalResp.valid inflightTable.io.lookupSourceId := io.coalResp.bits.source io.coalResp.ready := true.B // FIXME, see sw model implementation - assert( - !((io.coalReqValid === true.B) && (io.coalResp.valid === true.B) && - (io.newEntry.source === io.coalResp.bits.source)), - "inflight table: enqueueing and looking up the same srcId at the same cycle is not handled" - ) - // Un-coalescing logic // def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = { From df68bfec844b11dfb59a0ef61c2bfdb381468e30 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 18:20:19 -0700 Subject: [PATCH 07/10] Remove module dependency for uncoalescer instantiation for easier unittesting. now builds. --- src/main/scala/tilelink/Coalescing.scala | 18 +- .../scala/coalescing/CoalescingUnitTest.scala | 218 ++++++++++-------- 2 files changed, 129 insertions(+), 107 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index e7e69f8..f309b9f 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -893,14 +893,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) } val uncoalescer = Module( - new Uncoalescer(config, reqQueues, coalReqT, coalescer.invalidateT) + new Uncoalescer(config, nonCoalReqT, coalReqT) ) // connect coalesced request that is newly generated and being recorded in // the uncoalescer uncoalescer.io.coalReq <> coalescer.io.coalReq uncoalescer.io.invalidate := coalescer.io.invalidate val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits) - uncoalescer.io.window := reqQueues.io + uncoalescer.io.windowElts := reqQueues.io.elts // connect coalesced response going into the uncoalescer, ready to be // uncoalesced // Cleanup: custom <>? @@ -940,20 +940,22 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) class Uncoalescer( config: CoalescerConfig, - queueT: CoalShiftQueue[NonCoalescedRequest], - coalReqT: Request, - coalInvalidateT: Valid[Vec[UInt]], + nonCoalReqT: NonCoalescedRequest, + coalReqT: CoalescedRequest, ) extends Module { val inflightTable = Module(new InflightCoalReqTable(config)) val io = IO(new Bundle { // generated coalesced request, connected to the output of the coalescer. val coalReq = Flipped(DecoupledIO(coalReqT.cloneType)) // invalidate signal coming out of coalescer. - val invalidate = Input(coalInvalidateT.cloneType) + val invalidate = Input(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) // coalescing window, connected to the contents of the request queues. // Uncoalescer looks at the queue entries that got coalesced into `coalReq` // in order to record which lanes this coalReq originally came from. - val window = Input(queueT.io.cloneType) + // We only care about window.elts because the coalescer would have made + // sure it only looked at the valid entries. + // TODO: duplicate type construction + val windowElts = Input(Vec(config.numLanes, Vec(config.queueDepth, nonCoalReqT))) val coalResp = Flipped(Decoupled(new CoalescedResponse(config))) val uncoalResps = Output( Vec( @@ -977,7 +979,7 @@ class Uncoalescer( .foreach { case ((laneEntry, laneInv), lane) => (laneEntry.reqs zip laneInv.asBools).zipWithIndex .foreach { case ((reqEntry, inv), i) => - val req = io.window.elts(lane)(i) + val req = io.windowElts(lane)(i) when((io.invalidate.valid && inv)) { printf( s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 29ea8dc..36f8b13 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -735,125 +735,145 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { val coalDataWidth = 128 val numInflightCoalRequests = 4 + val config = uncoalescerTestConfig + + val nonCoalReqT = new NonCoalescedRequest(config) + val coalReqT = new CoalescedRequest(config) it should "work in general case" in { - test(new Uncoalescer(uncoalescerTestConfig)) + test(new Uncoalescer(config, nonCoalReqT, coalReqT)) // vcs helps with simulation time, but sometimes errors with // "mutation occurred during iteration" java error // .withAnnotations(Seq(VcsBackendAnnotation)) { c => val sourceId = 0.U - val four = c.io.newEntry.sizeEnumT.FOUR - c.io.coalReqValid.poke(true.B) - c.io.newEntry.source.poke(sourceId) - c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) - c.io.newEntry.lanes(0).reqs(0).source.poke(1.U) - c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) - c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) - c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B) - c.io.newEntry.lanes(0).reqs(1).source.poke(2.U) - c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes - c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four) - c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B) - c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) - c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) - c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U) - c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) - c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B) - c.io.newEntry.lanes(2).reqs(1).source.poke(2.U) - c.io.newEntry.lanes(2).reqs(1).offset.poke(3.U) - c.io.newEntry.lanes(2).reqs(1).sizeEnum.poke(four) - c.io.newEntry.lanes(3).reqs(0).valid.poke(false.B) + // val four = c.io.newEntry.sizeEnumT.FOUR + c.io.coalReq.valid.poke(true.B) + c.io.windowElts(0)(0).op.poke(0.U) + c.io.windowElts(0)(0).source.poke(1.U) + c.io.windowElts(0)(0).address.poke(0x4.U) + c.io.windowElts(0)(0).size.poke(2.U) + c.io.windowElts(0)(1).op.poke(0.U) + c.io.windowElts(0)(1).source.poke(2.U) + c.io.windowElts(0)(1).address.poke(0x4.U) + c.io.windowElts(0)(1).size.poke(2.U) + c.io.windowElts(2)(0).op.poke(0.U) + c.io.windowElts(2)(0).source.poke(1.U) + c.io.windowElts(2)(0).address.poke(0x4.U) + c.io.windowElts(2)(0).size.poke(2.U) + c.io.windowElts(2)(1).op.poke(0.U) + c.io.windowElts(2)(1).source.poke(2.U) + c.io.windowElts(2)(1).address.poke(0x4.U) + c.io.windowElts(2)(1).size.poke(2.U) + // c.io.newEntry.source.poke(sourceId) + // c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) + // c.io.newEntry.lanes(0).reqs(0).source.poke(1.U) + // c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) + // c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) + // c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B) + // c.io.newEntry.lanes(0).reqs(1).source.poke(2.U) + // c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes + // c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four) + // c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B) + // c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) + // c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) + // c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U) + // c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) + // c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B) + // c.io.newEntry.lanes(2).reqs(1).source.poke(2.U) + // c.io.newEntry.lanes(2).reqs(1).offset.poke(3.U) + // c.io.newEntry.lanes(2).reqs(1).sizeEnum.poke(four) + // c.io.newEntry.lanes(3).reqs(0).valid.poke(false.B) - c.clock.step() + // c.clock.step() - c.io.coalReqValid.poke(false.B) + // c.io.coalReqValid.poke(false.B) - c.clock.step() + // c.clock.step() - c.io.coalResp.valid.poke(true.B) - c.io.coalResp.bits.source.poke(sourceId) - val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) - // val lit = BigInt(0x0123456789abcdefL) - c.io.coalResp.bits.data.poke(lit.U) + // c.io.coalResp.valid.poke(true.B) + // c.io.coalResp.bits.source.poke(sourceId) + // val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) + // // val lit = BigInt(0x0123456789abcdefL) + // c.io.coalResp.bits.data.poke(lit.U) - // table lookup is combinational at the same cycle - c.io.uncoalResps(0)(0).valid.expect(true.B) - c.io.uncoalResps(1)(0).valid.expect(false.B) - c.io.uncoalResps(2)(0).valid.expect(true.B) - c.io.uncoalResps(3)(0).valid.expect(false.B) + // // table lookup is combinational at the same cycle + // c.io.uncoalResps(0)(0).valid.expect(true.B) + // c.io.uncoalResps(1)(0).valid.expect(false.B) + // c.io.uncoalResps(2)(0).valid.expect(true.B) + // c.io.uncoalResps(3)(0).valid.expect(false.B) - // offset is counting from LSB - c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) - c.io.uncoalResps(0)(0).bits.source.expect(1.U) - c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U) - c.io.uncoalResps(0)(1).bits.source.expect(2.U) - c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U) - c.io.uncoalResps(2)(0).bits.source.expect(2.U) - c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U) - c.io.uncoalResps(2)(1).bits.source.expect(2.U) + // // offset is counting from LSB + // c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) + // c.io.uncoalResps(0)(0).bits.source.expect(1.U) + // c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U) + // c.io.uncoalResps(0)(1).bits.source.expect(2.U) + // c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U) + // c.io.uncoalResps(2)(0).bits.source.expect(2.U) + // c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U) + // c.io.uncoalResps(2)(1).bits.source.expect(2.U) } } - it should "uncoalesce when coalesced to the same word offset" in { - test(new Uncoalescer(uncoalescerTestConfig)) - // .withAnnotations(Seq(VcsBackendAnnotation)) - { c => - val sourceId = 0.U - val four = c.io.newEntry.sizeEnumT.FOUR - c.io.coalReqValid.poke(true.B) - c.io.newEntry.source.poke(sourceId) - c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) - c.io.newEntry.lanes(0).reqs(0).source.poke(0.U) - c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) - c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) - c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B) - c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B) - c.io.newEntry.lanes(1).reqs(0).source.poke(1.U) - c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U) - c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four) - c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B) - c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) - c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) - c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U) - c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) - c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B) - c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B) - c.io.newEntry.lanes(3).reqs(0).source.poke(3.U) - c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U) - c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four) - c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B) + // it should "uncoalesce when coalesced to the same word offset" in { + // test(new Uncoalescer(uncoalescerTestConfig)) + // // .withAnnotations(Seq(VcsBackendAnnotation)) + // { c => + // val sourceId = 0.U + // val four = c.io.newEntry.sizeEnumT.FOUR + // c.io.coalReqValid.poke(true.B) + // c.io.newEntry.source.poke(sourceId) + // c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) + // c.io.newEntry.lanes(0).reqs(0).source.poke(0.U) + // c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) + // c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) + // c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B) + // c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B) + // c.io.newEntry.lanes(1).reqs(0).source.poke(1.U) + // c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U) + // c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four) + // c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B) + // c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) + // c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) + // c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U) + // c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) + // c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B) + // c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B) + // c.io.newEntry.lanes(3).reqs(0).source.poke(3.U) + // c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U) + // c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four) + // c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B) - c.clock.step() + // c.clock.step() - c.io.coalReqValid.poke(false.B) + // c.io.coalReqValid.poke(false.B) - c.clock.step() + // c.clock.step() - c.io.coalResp.valid.poke(true.B) - c.io.coalResp.bits.source.poke(sourceId) - val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) - c.io.coalResp.bits.data.poke(lit.U) + // c.io.coalResp.valid.poke(true.B) + // c.io.coalResp.bits.source.poke(sourceId) + // val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) + // c.io.coalResp.bits.data.poke(lit.U) - // table lookup is combinational at the same cycle - // offset is counting from LSB - c.io.uncoalResps(0)(0).valid.expect(true.B) - c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) - c.io.uncoalResps(0)(0).bits.source.expect(0.U) - c.io.uncoalResps(0)(1).valid.expect(false.B) - c.io.uncoalResps(1)(0).valid.expect(true.B) - c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U) - c.io.uncoalResps(1)(0).bits.source.expect(1.U) - c.io.uncoalResps(1)(1).valid.expect(false.B) - c.io.uncoalResps(2)(0).valid.expect(true.B) - c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U) - c.io.uncoalResps(2)(0).bits.source.expect(2.U) - c.io.uncoalResps(2)(1).valid.expect(false.B) - c.io.uncoalResps(3)(0).valid.expect(true.B) - c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U) - c.io.uncoalResps(3)(0).bits.source.expect(3.U) - c.io.uncoalResps(3)(1).valid.expect(false.B) - } - } + // // table lookup is combinational at the same cycle + // // offset is counting from LSB + // c.io.uncoalResps(0)(0).valid.expect(true.B) + // c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) + // c.io.uncoalResps(0)(0).bits.source.expect(0.U) + // c.io.uncoalResps(0)(1).valid.expect(false.B) + // c.io.uncoalResps(1)(0).valid.expect(true.B) + // c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U) + // c.io.uncoalResps(1)(0).bits.source.expect(1.U) + // c.io.uncoalResps(1)(1).valid.expect(false.B) + // c.io.uncoalResps(2)(0).valid.expect(true.B) + // c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U) + // c.io.uncoalResps(2)(0).bits.source.expect(2.U) + // c.io.uncoalResps(2)(1).valid.expect(false.B) + // c.io.uncoalResps(3)(0).valid.expect(true.B) + // c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U) + // c.io.uncoalResps(3)(0).bits.source.expect(3.U) + // c.io.uncoalResps(3)(1).valid.expect(false.B) + // } + // } } class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester { From b95b59cce02466f752a639eace9de555bb0e0116 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 18:30:15 -0700 Subject: [PATCH 08/10] Fix uncoalescer unittest --- .../scala/coalescing/CoalescingUnitTest.scala | 129 ++++++++---------- 1 file changed, 55 insertions(+), 74 deletions(-) diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 36f8b13..19205d4 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -706,34 +706,25 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { }*/ } -object uncoalescerTestConfig extends CoalescerConfig( - enable = true, - numLanes = 4, - queueDepth = 2, - waitTimeout = 8, - addressWidth = 24, - dataBusWidth = 5, - // watermark = 2, - wordSizeInBytes = 4, - numOldSrcIds = 16, - numNewSrcIds = 4, - respQueueDepth = 4, - coalLogSizes = Seq(4), - sizeEnum = DefaultInFlightTableSizeEnum, - numCoalReqs = 1, - numArbiterOutputPorts = 4, - bankStrideInBytes = 64, -) - class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "uncoalescer" - val numLanes = 4 - val numPerLaneReqs = 2 - val sourceWidth = 2 - val sizeWidth = 2 - // 16B coalescing size - val coalDataWidth = 128 - val numInflightCoalRequests = 4 + object uncoalescerTestConfig extends CoalescerConfig( + enable = true, + numLanes = 4, + queueDepth = 2, + waitTimeout = 8, + addressWidth = 24, + dataBusWidth = 4, // 128 bit data bus + wordSizeInBytes = 4, + numOldSrcIds = 16, + numNewSrcIds = 4, + respQueueDepth = 4, + coalLogSizes = Seq(4), + sizeEnum = DefaultInFlightTableSizeEnum, + numCoalReqs = 1, + numArbiterOutputPorts = 4, + bankStrideInBytes = 64, + ) val config = uncoalescerTestConfig @@ -745,72 +736,62 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { // "mutation occurred during iteration" java error // .withAnnotations(Seq(VcsBackendAnnotation)) { c => - val sourceId = 0.U - // val four = c.io.newEntry.sizeEnumT.FOUR - c.io.coalReq.valid.poke(true.B) + // 4 lanes, queue depth 2 c.io.windowElts(0)(0).op.poke(0.U) c.io.windowElts(0)(0).source.poke(1.U) c.io.windowElts(0)(0).address.poke(0x4.U) c.io.windowElts(0)(0).size.poke(2.U) c.io.windowElts(0)(1).op.poke(0.U) c.io.windowElts(0)(1).source.poke(2.U) - c.io.windowElts(0)(1).address.poke(0x4.U) + c.io.windowElts(0)(1).address.poke(0x4.U) // two reqs from one lane c.io.windowElts(0)(1).size.poke(2.U) c.io.windowElts(2)(0).op.poke(0.U) - c.io.windowElts(2)(0).source.poke(1.U) - c.io.windowElts(2)(0).address.poke(0x4.U) + c.io.windowElts(2)(0).source.poke(2.U) + c.io.windowElts(2)(0).address.poke(0x8.U) c.io.windowElts(2)(0).size.poke(2.U) c.io.windowElts(2)(1).op.poke(0.U) c.io.windowElts(2)(1).source.poke(2.U) - c.io.windowElts(2)(1).address.poke(0x4.U) + c.io.windowElts(2)(1).address.poke(0xc.U) c.io.windowElts(2)(1).size.poke(2.U) - // c.io.newEntry.source.poke(sourceId) - // c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) - // c.io.newEntry.lanes(0).reqs(0).source.poke(1.U) - // c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) - // c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) - // c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B) - // c.io.newEntry.lanes(0).reqs(1).source.poke(2.U) - // c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes - // c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four) - // c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B) - // c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) - // c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) - // c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U) - // c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) - // c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B) - // c.io.newEntry.lanes(2).reqs(1).source.poke(2.U) - // c.io.newEntry.lanes(2).reqs(1).offset.poke(3.U) - // c.io.newEntry.lanes(2).reqs(1).sizeEnum.poke(four) - // c.io.newEntry.lanes(3).reqs(0).valid.poke(false.B) + // indicate lane 0 and 2 are used for coalescing + c.io.invalidate.valid.poke(true.B) + c.io.invalidate.bits(0).poke(0x3.U) // 2'b11 for depth=2 + c.io.invalidate.bits(1).poke(0x0.U) + c.io.invalidate.bits(2).poke(0x3.U) + c.io.invalidate.bits(3).poke(0x0.U) - // c.clock.step() + val sourceId = 0.U + c.io.coalReq.valid.poke(true.B) + c.io.coalReq.bits.source.poke(sourceId) + c.io.coalReq.ready.expect(true.B) - // c.io.coalReqValid.poke(false.B) + c.clock.step() - // c.clock.step() + c.io.coalReq.valid.poke(false.B) - // c.io.coalResp.valid.poke(true.B) - // c.io.coalResp.bits.source.poke(sourceId) - // val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) - // // val lit = BigInt(0x0123456789abcdefL) - // c.io.coalResp.bits.data.poke(lit.U) + c.clock.step() - // // table lookup is combinational at the same cycle - // c.io.uncoalResps(0)(0).valid.expect(true.B) - // c.io.uncoalResps(1)(0).valid.expect(false.B) - // c.io.uncoalResps(2)(0).valid.expect(true.B) - // c.io.uncoalResps(3)(0).valid.expect(false.B) + c.io.coalResp.valid.poke(true.B) + c.io.coalResp.bits.source.poke(sourceId) + val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) + // val lit = BigInt(0x0123456789abcdefL) + c.io.coalResp.bits.data.poke(lit.U) - // // offset is counting from LSB - // c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) - // c.io.uncoalResps(0)(0).bits.source.expect(1.U) - // c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U) - // c.io.uncoalResps(0)(1).bits.source.expect(2.U) - // c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U) - // c.io.uncoalResps(2)(0).bits.source.expect(2.U) - // c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U) - // c.io.uncoalResps(2)(1).bits.source.expect(2.U) + // table lookup is combinational at the same cycle + c.io.uncoalResps(0)(0).valid.expect(true.B) + c.io.uncoalResps(1)(0).valid.expect(false.B) + c.io.uncoalResps(2)(0).valid.expect(true.B) + c.io.uncoalResps(3)(0).valid.expect(false.B) + + // offset is counting from LSB + c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(0)(0).bits.source.expect(1.U) + c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(0)(1).bits.source.expect(2.U) + c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U) + c.io.uncoalResps(2)(0).bits.source.expect(2.U) + c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U) + c.io.uncoalResps(2)(1).bits.source.expect(2.U) } } From 226e1d2d84d245e1982606d9a9e4000cf802b8ed Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 18:30:15 -0700 Subject: [PATCH 09/10] Fix uncoalescer unittest even more --- .../scala/coalescing/CoalescingUnitTest.scala | 117 ++++++++++-------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 19205d4..80342a1 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -730,6 +730,7 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { val nonCoalReqT = new NonCoalescedRequest(config) val coalReqT = new CoalescedRequest(config) + it should "work in general case" in { test(new Uncoalescer(config, nonCoalReqT, coalReqT)) // vcs helps with simulation time, but sometimes errors with @@ -768,6 +769,7 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { c.clock.step() c.io.coalReq.valid.poke(false.B) + c.io.invalidate.valid.poke(false.B) c.clock.step() @@ -795,66 +797,71 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { } } - // it should "uncoalesce when coalesced to the same word offset" in { - // test(new Uncoalescer(uncoalescerTestConfig)) - // // .withAnnotations(Seq(VcsBackendAnnotation)) - // { c => - // val sourceId = 0.U - // val four = c.io.newEntry.sizeEnumT.FOUR - // c.io.coalReqValid.poke(true.B) - // c.io.newEntry.source.poke(sourceId) - // c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) - // c.io.newEntry.lanes(0).reqs(0).source.poke(0.U) - // c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) - // c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) - // c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B) - // c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B) - // c.io.newEntry.lanes(1).reqs(0).source.poke(1.U) - // c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U) - // c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four) - // c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B) - // c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) - // c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) - // c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U) - // c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) - // c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B) - // c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B) - // c.io.newEntry.lanes(3).reqs(0).source.poke(3.U) - // c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U) - // c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four) - // c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B) + it should "uncoalesce when coalesced to the same word offset" in { + test(new Uncoalescer(config, nonCoalReqT, coalReqT)) + // .withAnnotations(Seq(VcsBackendAnnotation)) + { c => + // 4 lanes, queue depth 2 + c.io.windowElts(0)(0).op.poke(0.U) + c.io.windowElts(0)(0).source.poke(0.U) + c.io.windowElts(0)(0).address.poke(0x4.U) + c.io.windowElts(0)(0).size.poke(2.U) + c.io.windowElts(1)(0).op.poke(0.U) + c.io.windowElts(1)(0).source.poke(1.U) + c.io.windowElts(1)(0).address.poke(0x4.U) // two reqs from one lane + c.io.windowElts(1)(0).size.poke(2.U) + c.io.windowElts(2)(0).op.poke(0.U) + c.io.windowElts(2)(0).source.poke(2.U) + c.io.windowElts(2)(0).address.poke(0x4.U) + c.io.windowElts(2)(0).size.poke(2.U) + c.io.windowElts(3)(0).op.poke(0.U) + c.io.windowElts(3)(0).source.poke(3.U) + c.io.windowElts(3)(0).address.poke(0x4.U) + c.io.windowElts(3)(0).size.poke(2.U) + // indicate lanes used for coalescing + c.io.invalidate.valid.poke(true.B) + c.io.invalidate.bits(0).poke(0x1.U) // 2'b01 for enabling head + c.io.invalidate.bits(1).poke(0x1.U) + c.io.invalidate.bits(2).poke(0x1.U) + c.io.invalidate.bits(3).poke(0x1.U) - // c.clock.step() + val sourceId = 0.U + c.io.coalReq.valid.poke(true.B) + c.io.coalReq.bits.source.poke(sourceId) + c.io.coalReq.ready.expect(true.B) - // c.io.coalReqValid.poke(false.B) + c.clock.step() - // c.clock.step() + c.io.coalReq.valid.poke(false.B) + c.io.invalidate.valid.poke(false.B) - // c.io.coalResp.valid.poke(true.B) - // c.io.coalResp.bits.source.poke(sourceId) - // val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) - // c.io.coalResp.bits.data.poke(lit.U) + c.clock.step() - // // table lookup is combinational at the same cycle - // // offset is counting from LSB - // c.io.uncoalResps(0)(0).valid.expect(true.B) - // c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) - // c.io.uncoalResps(0)(0).bits.source.expect(0.U) - // c.io.uncoalResps(0)(1).valid.expect(false.B) - // c.io.uncoalResps(1)(0).valid.expect(true.B) - // c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U) - // c.io.uncoalResps(1)(0).bits.source.expect(1.U) - // c.io.uncoalResps(1)(1).valid.expect(false.B) - // c.io.uncoalResps(2)(0).valid.expect(true.B) - // c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U) - // c.io.uncoalResps(2)(0).bits.source.expect(2.U) - // c.io.uncoalResps(2)(1).valid.expect(false.B) - // c.io.uncoalResps(3)(0).valid.expect(true.B) - // c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U) - // c.io.uncoalResps(3)(0).bits.source.expect(3.U) - // c.io.uncoalResps(3)(1).valid.expect(false.B) - // } - // } + c.io.coalResp.valid.poke(true.B) + c.io.coalResp.bits.source.poke(sourceId) + val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) + c.io.coalResp.bits.data.poke(lit.U) + + // table lookup is combinational at the same cycle + // offset is counting from LSB + c.io.uncoalResps(0)(0).valid.expect(true.B) + c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(0)(0).bits.source.expect(0.U) + c.io.uncoalResps(0)(1).valid.expect(false.B) + c.io.uncoalResps(1)(0).valid.expect(true.B) + c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(1)(0).bits.source.expect(1.U) + c.io.uncoalResps(1)(1).valid.expect(false.B) + c.io.uncoalResps(2)(0).valid.expect(true.B) + c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(2)(0).bits.source.expect(2.U) + c.io.uncoalResps(2)(1).valid.expect(false.B) + c.io.uncoalResps(3)(0).valid.expect(true.B) + c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(3)(0).bits.source.expect(3.U) + c.io.uncoalResps(3)(1).valid.expect(false.B) + } + } } class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester { From 9b7080a852788a71938c3ddad478c7df00a939ab Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 11 May 2023 18:50:47 -0700 Subject: [PATCH 10/10] Delete old inflight table unittest --- .../scala/coalescing/CoalescingUnitTest.scala | 135 ------------------ 1 file changed, 135 deletions(-) diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 80342a1..546abad 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -863,138 +863,3 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { } } } - -class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester { - behavior of "inflight coalesced request table" - val numLanes = 4 - val numPerLaneReqs = 2 - val sourceWidth = 2 - val entries = 4 - - val offsetBits = 4 - val sizeBits = 2 - - val inflightCoalReqTableEntry = - new InflightCoalReqTableEntry( - numLanes, - numPerLaneReqs, - sourceWidth, - offsetBits, - testConfig.sizeEnum - ) - - // it should "stop enqueueing when full" in { - // test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c => - // // fill up the table - // for (i <- 0 until entries) { - // val sourceId = i - // c.io.enq.ready.expect(true.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(sourceId.U) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - // c.io.lookup.ready.poke(false.B) - // c.clock.step() - // } - - // // now cannot enqueue any more - // c.io.enq.ready.expect(false.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(0.U) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - - // c.clock.step() - // c.io.enq.ready.expect(false.B) - - // // try to lookup all existing entries - // for (i <- 0 until entries) { - // val sourceId = i - // c.io.enq.valid.poke(false.B) - // c.io.lookup.ready.poke(true.B) - // c.io.lookupSourceId.poke(sourceId) - // c.io.lookup.valid.expect(true.B) - // c.io.lookup.bits.expect(sourceId) - // c.clock.step() - // } - - // // now the table should be empty - // for (i <- 0 until entries) { - // val sourceId = i - // c.io.enq.valid.poke(false.B) - // c.io.lookup.ready.poke(true.B) - // c.io.lookupSourceId.poke(sourceId) - // c.io.lookup.valid.expect(false.B) - // c.clock.step() - // } - // } - // } - // it should "lookup matching entry" in { - // test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) - // .withAnnotations(Seq(WriteVcdAnnotation)) { c => - // c.reset.poke(true.B) - // c.clock.step(10) - // c.reset.poke(false.B) - - // // enqueue one entry to not match at 0th index - // c.io.enq.ready.expect(true.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(0.U) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - - // c.clock.step() - - // val targetSourceId = 1.U - // c.io.enq.ready.expect(true.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(targetSourceId) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - - // c.clock.step() - - // c.io.lookup.ready.poke(true.B) - // c.io.lookupSourceId.poke(targetSourceId) - // c.io.lookup.valid.expect(true.B) - // c.io.lookup.bits.expect(targetSourceId) - - // c.clock.step() - - // // test if matching entry dequeues after 1 cycle - // c.io.lookup.ready.poke(true.B) - // c.io.lookupSourceId.poke(targetSourceId) - // c.io.lookup.valid.expect(false.B) - // } - // } - // it should "handle lookup and enqueue at the same time" in { - // test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c => - // // fill up the table - // val targetSourceId = 1.U - // c.io.enq.ready.expect(true.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(0.U) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - // c.clock.step() - // c.io.enq.ready.expect(true.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(targetSourceId) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - // c.clock.step() - - // // do both enqueue and lookup at the same cycle - // val enqSourceId = 2.U - // c.io.enq.ready.expect(true.B) - // c.io.enq.valid.poke(true.B) - // c.io.enq.bits.fromLane.poke(0.U) - // c.io.enq.bits.respSourceId.poke(enqSourceId) - // c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) } - // c.io.lookup.ready.poke(true.B) - // c.io.lookupSourceId.poke(targetSourceId) - - // c.clock.step() - // } - // } -}