From ccf9b95fb5dab8e43f0df7693a39e0805418b7b8 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 23 Apr 2023 13:37:10 -0700 Subject: [PATCH] Create custom response bundle to decouple from TileLink ... and easier unit testing. --- src/main/scala/tilelink/Coalescing.scala | 31 ++- .../scala/coalescing/CoalescingUnitTest.scala | 262 +++++++++--------- 2 files changed, 154 insertions(+), 139 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 1c6d0d3..996a24a 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -327,7 +327,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends tlCoal.a.bits := coalescer.io.out_req.bits.toTLA(edgeCoal) tlCoal.b.ready := true.B tlCoal.c.valid := false.B - tlCoal.d.ready := true.B + // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. tlCoal.e.valid := false.B @@ -475,13 +475,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends dontTouch(newEntry) // Uncoalescer module uncoalesces responses back to each lane - val uncoalescer = Module(new UncoalescingUnit(config, tlCoal.d.bits.cloneType)) + val uncoalescer = Module(new UncoalescingUnit(config)) uncoalescer.io.coalReqValid := coalescer.io.out_req.valid uncoalescer.io.newEntry := newEntry - // richard: I changed this to use the DecoupledIO interface, which TL is using, - // previously we were not handling the ready signal - uncoalescer.io.coalResp <> tlCoal.d + // Cleanup: custom <>? + uncoalescer.io.coalResp.valid := tlCoal.d.valid + uncoalescer.io.coalResp.bits.source := tlCoal.d.bits.source + uncoalescer.io.coalResp.bits.data := tlCoal.d.bits.data + tlCoal.d.ready := uncoalescer.io.coalResp.ready // Queue up synthesized uncoalesced responses into each lane's response queue (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) => @@ -507,7 +509,19 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends dontTouch(tlCoal.d) } -class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Module { +// Protocol-agnostic bundle that represents a coalesced response. +// +// Having this makes it easier to: +// * do unit tests -- no need to deal with TileLink in the chiseltest code +// * adapt coalescer to custom protocols like a custom L1 cache interface. +// +// FIXME: overlaps with RespQueueEntry. Trait-ify +class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle { + val source = UInt(log2Ceil(config.NUM_NEW_IDS).W) + val data = UInt((8 * (1 << config.MAX_SIZE)).W) +} + +class UncoalescingUnit(config: CoalescerConfig) extends Module { // notes to hansung: // val numLanes: Int, <-> config.NUM_LANES // val numPerLaneReqs: Int, <-> config.DEPTH @@ -518,8 +532,9 @@ class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Modu val inflightTable = Module(new InflightCoalReqTable(config)) val io = IO(new Bundle { val coalReqValid = Input(Bool()) + // FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer val newEntry = Input(inflightTable.entryT.cloneType) - val coalResp = Flipped(Decoupled(tlCoalD)) + val coalResp = Flipped(Decoupled(new CoalescedResponseBundle(config))) val uncoalResps = Output( Vec( config.NUM_LANES, @@ -1239,7 +1254,7 @@ class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters) class TLRAMCoalescer(implicit p: Parameters) extends LazyModule { // TODO: use parameters for numLanes val numLanes = 4 - val filename = "test.trace" + val filename = "vecadd.core1.thread4.trace" val coal = LazyModule(new CoalescingUnit(defaultConfig)) val driver = LazyModule(new MemTraceDriver(defaultConfig, filename)) val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 5f722c5..61d1de9 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -36,216 +36,220 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "work like normal shiftqueue when no invalidate" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => - c.io.deq.ready.poke(false.B) + c.io.queue.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x12.U) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) c.clock.step() - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x34.U) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) c.clock.step() - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x56.U) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x56.U) c.clock.step() - c.io.enq.valid.poke(false.B) + c.io.queue.enq.valid.poke(false.B) - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x12.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x12.U) c.clock.step() - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x34.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34.U) c.clock.step() // enqueue in the middle - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x78.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x78.U) c.clock.step() - c.io.enq.valid.poke(false.B) - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x56.U) + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x56.U) c.clock.step() - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x78.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x78.U) c.clock.step() // should be emptied - c.io.deq.valid.expect(false.B) + c.io.queue.deq.valid.expect(false.B) } } it should "work when enqueing and dequeueing simultaneously" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => - c.io.invalidate.poke(0.U) + c.io.invalidate.valid.poke(false.B) // prepare - c.io.deq.ready.poke(true.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x12.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) c.clock.step() // enqueue and dequeue simultaneously - c.io.deq.ready.poke(true.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x34.U) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x12.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x12.U) c.clock.step() // dequeueing back-to-back should work without any holes in the middle - c.io.deq.ready.poke(true.B) - c.io.enq.valid.poke(false.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x34.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34.U) c.clock.step() // make sure is empty - c.io.deq.ready.poke(true.B) - c.io.enq.valid.poke(false.B) - c.io.deq.valid.expect(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.valid.expect(false.B) } } it should "work when enqueing and dequeueing simultaneously to a full queue" in { test(new CoalShiftQueue(UInt(8.W), 1)) { c => - c.io.invalidate.poke(0.U) + c.io.invalidate.valid.poke(false.B) // prepare - c.io.deq.ready.poke(true.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x12.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) c.clock.step() // enqueue and dequeue simultaneously - c.io.deq.ready.poke(true.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x34.U) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x12.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x12.U) c.clock.step() // enqueue and dequeue simultaneously once more - c.io.deq.ready.poke(true.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x56.U) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x34.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x56.U) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34.U) c.clock.step() // dequeueing back-to-back should work without any holes in the middle - c.io.deq.ready.poke(true.B) - c.io.enq.valid.poke(false.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x56.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x56.U) c.clock.step() // make sure is empty - c.io.deq.ready.poke(true.B) - c.io.enq.valid.poke(false.B) - c.io.deq.valid.expect(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.valid.expect(false.B) } } it should "invalidate head being dequeued" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => - c.io.invalidate.poke(0.U) + c.io.invalidate.valid.poke(false.B) // prepare - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x12.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) c.clock.step() - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x34.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) c.clock.step() - c.io.enq.valid.poke(false.B) + c.io.queue.enq.valid.poke(false.B) // invalidate should work for the head just being dequeued at the same // cycle. However, it should not change deq.valid right away to avoid // combinational cycles (see definition). - c.io.invalidate.poke(0x1.U) - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) + c.io.invalidate.valid.poke(true.B) + c.io.invalidate.bits.poke(0x1.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) c.clock.step() // 0x12 should have been dequeued - c.io.invalidate.poke(0.U) - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x34.U) + c.io.invalidate.valid.poke(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34.U) } } it should "dequeue invalidated entries by itself" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => - c.io.invalidate.poke(0.U) + c.io.invalidate.valid.poke(false.B) // prepare - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x12.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) c.clock.step() - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x34.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) c.clock.step() - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x56.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x56.U) c.clock.step() - c.io.enq.valid.poke(false.B) + c.io.queue.enq.valid.poke(false.B) // invalidate two entries at head - c.io.invalidate.poke(0x3.U) + c.io.invalidate.valid.poke(true.B) + c.io.invalidate.bits.poke(0x3.U) c.clock.step() // 0x12 should have been dequeued now - c.io.invalidate.poke(0x0.U) - c.io.deq.ready.poke(false.B) + c.io.invalidate.valid.poke(false.B) + c.io.queue.deq.ready.poke(false.B) c.clock.step() // 0x34 should have been dequeued now - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x56.U) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x56.U) c.clock.step() - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(false.B) } } it should "overwrite invalidated tail when enqueuing" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => - c.io.invalidate.poke(0.U) + c.io.invalidate.valid.poke(false.B) + c.io.invalidate.bits.poke(0.U) // prepare - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x12.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) c.clock.step() // invalidate and enqueue at the tail at the same time - c.io.invalidate.poke(0x1.U) - c.io.deq.ready.poke(false.B) - c.io.enq.ready.expect(true.B) - c.io.enq.valid.poke(true.B) - c.io.enq.bits.poke(0x34.U) + c.io.invalidate.valid.poke(true.B) + c.io.invalidate.bits.poke(0x1.U) + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) c.clock.step() - c.io.invalidate.poke(0x0.U) - c.io.enq.valid.poke(false.B) + c.io.invalidate.valid.poke(false.B) + c.io.queue.enq.valid.poke(false.B) // now should be able to dequeue immediately as tail is overwritten - c.io.deq.ready.poke(true.B) - c.io.deq.valid.expect(true.B) - c.io.deq.bits.expect(0x34) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34) } } } @@ -260,15 +264,11 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { val coalDataWidth = 128 val numInflightCoalRequests = 4 + val hey = new TLBundleParameters(64, 64, 64, 64, 64) it should "work" in { test( new UncoalescingUnit( - numLanes, - numPerLaneReqs, - sourceWidth, - sizeWidth, - coalDataWidth, - numInflightCoalRequests, + defaultConfig, ) ) // vcs helps with simulation time, but sometimes errors with