From 997b421c4256552ff95a6bfb5eec9f9a4465c16d Mon Sep 17 00:00:00 2001 From: Richard Yan Date: Tue, 2 May 2023 00:07:45 -0700 Subject: [PATCH] active byte lane implementation for multi coalescer & add one shift queue test --- src/main/scala/tilelink/Coalescing.scala | 25 +++- .../scala/coalescing/CoalescingUnitTest.scala | 120 ++++++++++-------- 2 files changed, 84 insertions(+), 61 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 5f0fc56..3cab524 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -52,6 +52,7 @@ case class CoalescerConfig( coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers // each size is log(byteSize) sizeEnum: InFlightTableSizeEnum, + arbiterOutputs: Int ) { // maximum coalesced size def maxCoalLogSize: Int = coalLogSizes.max @@ -70,13 +71,25 @@ object defaultConfig extends CoalescerConfig( numNewSrcIds = 4, respQueueDepth = 4, coalLogSizes = Seq(3), - sizeEnum = DefaultInFlightTableSizeEnum + sizeEnum = DefaultInFlightTableSizeEnum, + arbiterOutputs = 4 ) class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends LazyModule { - // Identity node that captures the incoming TL requests and passes them - // through the other end, dropping coalesced requests. This node is what - // will be visible to upstream and downstream nodes. + // Nexus node that captures the incoming TL requests, rewrites coalescable requests, + // and arbitrates between non-coalesced and coalesced requests to a fix number of outputs + // before sending it out to memory. This node is what's visible to upstream and downstream nodes. + + // WIP: +// val node = TLNexusNode( +// clientFn = c => c.head, +// managerFn = m => m.head // assuming arbiter generated ids are distinct between edges +// ) +// node.in.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.wordSizeInBytes, +// s"input edges into coalescer node does not have beatBytes = ${config.wordSizeInBytes}")) +// node.out.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.maxCoalLogSize, +// s"output edges into coalescer node does not have beatBytes = ${config.maxCoalLogSize}")) + val node = TLIdentityNode() // Number of maximum in-flight coalesced requests. The upper bound of this @@ -452,7 +465,9 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE for (i <- 0 until maxWords) { val sel = flatReqs.zip(flatMatches).map { case (req, m) => - m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U) + // note: ANDing against addrMask is to conform to active byte lanes requirements + // if aligning to LSB suffices, we should add the bitwise AND back + m && ((req.address(config.maxCoalLogSize - 1, 0)/* & addrMask*/) === i.U) } // TODO: SW uses priority encoder, not sure about behavior of MuxCase data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) => diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index bcd657e..7d7146d 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -114,7 +114,8 @@ object testConfig extends CoalescerConfig( numNewSrcIds = 4, respQueueDepth = 4, coalLogSizes = Seq(3), - sizeEnum = DefaultInFlightTableSizeEnum + sizeEnum = DefaultInFlightTableSizeEnum, + arbiterOutputs = 4 ) class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { @@ -216,29 +217,28 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "request shift queues" + def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = { + ((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) => + enq.ready.expect(true.B) + enq.valid.poke(valid) + enq.bits.poke(ent) + } + c.clock.step() + } + + def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = { + ((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) => + deq.valid.expect(valid) + deq.bits.expect(ent) + } + } + + def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = { + (vec zip value).foreach { case (a, b) => a.poke(b) } + } + it should "work like normal shiftqueue when no invalidate" in { -// new CoalShiftQueue(0.U,4, testConfig) - def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = { - ((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) => - enq.ready.expect(true.B) - enq.valid.poke(valid) - enq.bits.poke(ent) - } - c.clock.step() - } - - def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = { - ((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) => - deq.valid.expect(valid) - deq.bits.expect(ent) - } - } - - def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = { - (vec zip value).foreach { case (a, b) => a.poke(b) } - } - test(new CoalShiftQueue(UInt(8.W),4, testConfig)) { c => c.io.coalescable.foreach(_.poke(true.B)) c.io.queue.deq.foreach(_.ready.poke(false.B)) @@ -322,42 +322,49 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) c.clock.step() -// attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B)) + attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B)) + } + } + + it should "work when enqueing and dequeueing simultaneously" in { + test(new CoalShiftQueue(UInt(8.W), 4, testConfig)) { c => + c.io.invalidate.valid.poke(false.B) + + c.io.coalescable.foreach(_.poke(true.B)) + c.io.queue.deq.foreach(_.ready.poke(false.B)) + + attemptEnqueue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B)) + + // mark for dequeue + c.io.coalescable.foreach(_.poke(false.B)) + c.io.queue.deq.foreach(_.ready.poke(true.B)) + expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B)) + attemptEnqueue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) + + expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(false.B)) + attemptEnqueue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B)) + + expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) + attemptEnqueue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B)) + + expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(false.B)) + attemptEnqueue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B)) + + expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B)) + c.clock.step() + expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(false.B)) + c.clock.step() + expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B)) + c.clock.step() + expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(false.B)) + c.clock.step() + expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B)) + c.clock.step() + expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(false.B)) + c.clock.step() } } /* - it should "work when enqueing and dequeueing simultaneously" in { - test(new CoalShiftQueue(UInt(8.W), 4)) { c => - c.io.invalidate.valid.poke(false.B) - c.io.allowShift.poke(true.B) - - // prepare - c.io.queue.deq.ready.poke(true.B) - c.io.queue.enq.ready.expect(true.B) - c.io.queue.enq.valid.poke(true.B) - c.io.queue.enq.bits.poke(0x12.U) - c.clock.step() - // enqueue and dequeue simultaneously - c.io.queue.deq.ready.poke(true.B) - c.io.queue.enq.ready.expect(true.B) - c.io.queue.enq.valid.poke(true.B) - c.io.queue.enq.bits.poke(0x34.U) - c.io.queue.deq.valid.expect(true.B) - c.io.queue.deq.bits.expect(0x12.U) - c.clock.step() - // dequeueing back-to-back should work without any holes in the middle - c.io.queue.deq.ready.poke(true.B) - c.io.queue.enq.valid.poke(false.B) - c.io.queue.deq.valid.expect(true.B) - c.io.queue.deq.bits.expect(0x34.U) - c.clock.step() - // make sure is empty - c.io.queue.deq.ready.poke(true.B) - c.io.queue.enq.valid.poke(false.B) - c.io.queue.deq.valid.expect(false.B) - } - } - it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in { test(new CoalShiftQueue(UInt(8.W), 1)) { c => c.io.invalidate.valid.poke(false.B) @@ -566,7 +573,8 @@ object uncoalescerTestConfig extends CoalescerConfig( numNewSrcIds = 4, respQueueDepth = 4, coalLogSizes = Seq(4), - sizeEnum = DefaultInFlightTableSizeEnum + sizeEnum = DefaultInFlightTableSizeEnum, + arbiterOutputs = 4 ) class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {