package freechips.rocketchip.tilelink.coalescing import chisel3._ import chiseltest._ import chiseltest.simulator.VerilatorFlags import org.scalatest.flatspec.AnyFlatSpec import freechips.rocketchip.tilelink._ import freechips.rocketchip.util.MultiPortQueue import freechips.rocketchip.diplomacy._ import freechips.rocketchip.subsystem.WithoutTLMonitors import org.chipsalliance.cde.config.Parameters import chisel3.util.{DecoupledIO, Valid} import chisel3.util.experimental.BoringUtils class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "MultiPortQueue" // This is really just to figure out how MultiPortQueue works it should "serialize at dequeue end" in { test(new MultiPortQueue(UInt(4.W), 3, 1, 3, 6)) .withAnnotations(Seq(WriteVcdAnnotation)) { c => c.io.enq(0).valid.poke(true.B) c.io.enq(0).bits.poke(11.U) c.io.enq(1).valid.poke(true.B) c.io.enq(1).bits.poke(15.U) c.io.enq(2).valid.poke(true.B) c.io.enq(2).bits.poke(7.U) c.io.deq(0).ready.poke(true.B) c.clock.step() // c.io.enq(0).valid.poke(false.B) // c.io.enq(1).valid.poke(false.B) for (_ <- 0 until 100) { c.clock.step() } // c.io.deq(0).valid.expect(false.B) } } } class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule { val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ => TLClientNode( Seq( TLMasterPortParameters.v1( Seq( TLClientParameters( name = "processor-nodes", sourceId = IdRange(0, testConfig.numOldSrcIds), visibility = Seq(AddressSet(0x0, 0xffffff)) ) ) ) ) ) // 24 bit address space (TODO probably use testConfig) } val device = new SimpleDevice("dummy", Seq("dummy")) val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus val l2Nodes = Seq.tabulate(5) { _ => TLManagerNode( Seq( TLSlavePortParameters.v1( Seq( TLManagerParameters( address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode resources = device.reg, regionType = RegionType.UNCACHED, executable = true, supportsArithmetic = TransferSizes(1, beatBytes), supportsLogical = TransferSizes(1, beatBytes), supportsGet = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes), supportsPutPartial = TransferSizes(1, beatBytes), supportsHint = TransferSizes(1, beatBytes), fifoId = Some(0) ) ), beatBytes ) ) ) } val dut = LazyModule(new CoalescingUnit(testConfig)) cpuNodes.foreach(dut.cpuNode := _) l2Nodes.foreach(_ := dut.aggregateNode) lazy val module = new DummyCoalescingUnitTBImp(this) } class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleImp(outer) { val coal = outer.dut // FIXME: these need to be separate variables because of implicit naming in makeIOs // there has to be a better way val coalIO0 = outer.cpuNodes(0).makeIOs() val coalIO1 = outer.cpuNodes(1).makeIOs() val coalIO2 = outer.cpuNodes(2).makeIOs() val coalIO3 = outer.cpuNodes(3).makeIOs() val coalIOs = Seq(coalIO0, coalIO1, coalIO2, coalIO3) val l2IO0 = outer.l2Nodes(0).makeIOs() val l2IO1 = outer.l2Nodes(1).makeIOs() val l2IO2 = outer.l2Nodes(2).makeIOs() val l2IO3 = outer.l2Nodes(3).makeIOs() val l2IO4 = outer.l2Nodes(4).makeIOs() val l2IOs = Seq(l2IO0, l2IO1, l2IO2, l2IO3, l2IO4) // val coalMasterNode = coal.coalescerNode.makeIOs() private val reqQueues = coal.module.reqQueues private val coalescer = coal.module.coalescer // workaround for peeking internal signals as outlined in // https://github.com/ucb-bar/chiseltest/issues/17 private val peekIn = Seq( reqQueues.io.queue.enq.map(_.ready), reqQueues.io.queue.enq.map(_.bits), reqQueues.io.queue.enq.map(_.valid), reqQueues.io.queue.deq.map(_.bits), reqQueues.io.queue.deq.map(_.valid), coalescer.io.coalReq.ready, coalescer.io.coalReq.bits, coalescer.io.coalReq.valid, coalescer.io.invalidate, ) val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[Request]].map(x => IO(x.cloneType)) val reqQueueEnqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[Request]].map(x => IO(Output(x.cloneType))) val reqQueueDeqValid = peekIn(4).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType))) val coalReqReady = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType)) val coalReqBits = IO(Output(peekIn(6).asInstanceOf[Request].cloneType)) val coalReqValid = IO(Output(peekIn(7).asInstanceOf[Bool].cloneType)) val coalInvalidate = IO(Output(peekIn(8).asInstanceOf[Valid[Vec[UInt]]].cloneType)) private val peekOut = Seq( reqQueueEnqReady, reqQueueEnqBits, reqQueueEnqValid, reqQueueDeqBits, reqQueueDeqValid, coalReqReady, coalReqBits, coalReqValid, coalInvalidate, ) (peekIn zip peekOut).foreach { case (inner: IndexedSeq[Data], outer: Seq[Data]) => (inner zip outer).foreach { case (i, o) => BoringUtils.bore(i, Seq(o)) } case (inner: Data, outer: Data) => BoringUtils.bore(inner, Seq(outer)) case _ => assert(false, "boring between different data types") } private val pokeIn = Seq( reqQueues.io.queue.deq.map(_.ready), // coalescer.io.coalReq.ready ) val reqQueueDeqReady = pokeIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) private val pokeOut = Seq( reqQueueDeqReady ) // TODO: doesn't work yet (pokeIn zip pokeOut).foreach { case (inner: IndexedSeq[Data], outer: Seq[Data]) => (inner zip outer).foreach { case (i, o) => BoringUtils.bore(i, Seq(o)) } case (inner: Data, outer: Data) => BoringUtils.bore(inner, Seq(outer)) case _ => assert(false, "boring between different data types") } } object testConfig extends CoalescerConfig( enable = true, numLanes = 4, queueDepth = 1, waitTimeout = 8, addressWidth = 24, dataBusWidth = 5, // watermark = 2, wordSizeInBytes = 4, numOldSrcIds = 16, numNewSrcIds = 4, respQueueDepth = 4, coalLogSizes = Seq(4, 5), sizeEnum = DefaultInFlightTableSizeEnum, numCoalReqs = 1, numArbiterOutputPorts = 4, bankStrideInBytes = 64 ) class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "multi- and mono-coalescers" implicit val p: Parameters = Parameters.empty def pokeA( nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Long, valid: Boolean = true, ): Unit = { val node = nodes(idx) node.a.ready.expect(true.B) node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get) node.a.bits.param.poke(0.U) node.a.bits.size.poke(size.U) node.a.bits.source.poke(source.U) node.a.bits.address.poke(addr.U) node.a.bits.mask.poke(mask.U) node.a.bits.data.poke(data.U) node.a.bits.corrupt.poke(false.B) node.a.valid.poke(valid.B) } def unsetA(nodes: Seq[TLBundle]): Unit = { nodes.foreach { node => node.a.valid.poke(false.B) } } def expectVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = { (vec zip value).foreach { case (a, b) => a.expect(b) } } it should "coalesce fully consecutive accesses at size 4, only once" in { test(LazyModule(new DummyCoalescingUnitTB()(new WithoutTLMonitors())).module) .withAnnotations(Seq(VerilatorBackendAnnotation, VerilatorFlags(Seq("--coverage-line")), WriteFstAnnotation)) // .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c => val nodes = c.coalIOs.map(_.head) c.l2IOs.foreach(_.head.a.ready.poke(true.B)) c.reqQueueEnqReady.foreach(_.expect(true.B)) pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111) pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222) pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333) pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444) expectVec(c.reqQueueEnqBits.map(_.data), Seq(0x1111.U, 0x2222.U, 0x3333.U, 0x4444.U)) c.clock.step() unsetA(nodes) c.reqQueueDeqValid.foreach(_.expect(false.B)) c.coalReqValid.expect(true.B) c.coalReqBits.address.expect(0x10.U) c.coalReqBits.data.expect(BigInt("4444000033330000222200001111", 16) << 128) c.coalReqBits.mask.expect(0xffff0000L) c.coalReqBits.size.expect(4.U) c.coalReqBits.op.expect(1.U) c.coalReqReady.expect(true.B) c.reqQueueEnqReady.foreach(_.expect(true.B)) pokeA(nodes, idx = 0, op = 1, size = 2, source = 1, addr = 0xf20, mask = 0xf, data = 0x5555) pokeA(nodes, idx = 1, op = 1, size = 2, source = 1, addr = 0xf24, mask = 0xf, data = 0x6666, valid = false) pokeA(nodes, idx = 2, op = 1, size = 2, source = 1, addr = 0xf28, mask = 0xf, data = 0x7777) pokeA(nodes, idx = 3, op = 1, size = 2, source = 1, addr = 0xf2c, mask = 0xf, data = 0x8888, valid = false) c.clock.step() c.coalReqValid.expect(true.B) c.coalReqBits.address.expect(0xf20.U) c.coalReqBits.data.expect(BigInt("77770000000000005555", 16)) // technically these can be dontcare's c.coalReqBits.mask.expect(0x00000f0f) c.coalReqBits.size.expect(4.U) c.coalReqBits.op.expect(1.U) c.coalReqReady.expect(true.B) c.reqQueueEnqReady.foreach(_.expect(true.B)) pokeA(nodes, idx = 0, op = 0, size = 2, source = 2, addr = 0xd04, mask = 0xa, data = 0xdeadbeefL) pokeA(nodes, idx = 1, op = 0, size = 2, source = 2, addr = 0xd0c, mask = 0xb, data = 0x8badf00dL) pokeA(nodes, idx = 2, op = 0, size = 2, source = 2, addr = 0xd14, mask = 0xc, data = 0xcafeb0baL) pokeA(nodes, idx = 3, op = 0, size = 2, source = 2, addr = 0xd1c, mask = 0xd, data = 0xdabbad00L) c.clock.step() c.coalReqValid.expect(true.B) c.coalReqBits.address.expect(0xd00.U) c.coalReqBits.size.expect(5.U) c.coalReqBits.data.expect(BigInt("dabbad0000000000cafeb0ba000000008badf00d00000000deadbeef00000000", 16)) c.coalReqBits.mask.expect(0xd0c0b0a0L) c.coalReqBits.op.expect(0.U) c.clock.step() // c.clock.step() } } it should "coalesce identical addresses (stride of 0)" in { test(LazyModule(new DummyCoalescingUnitTB()(new WithoutTLMonitors())).module) .withAnnotations(Seq(VerilatorBackendAnnotation)) { c => println(s"coalIO length = ${c.coalIOs(0).length}") val nodes = c.coalIOs.map(_.head) c.l2IOs.foreach(_.head.a.ready.poke(true.B)) c.coalReqReady.expect(true.B) c.reqQueueEnqReady.foreach(_.expect(true.B)) pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x1111) pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x2222) pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333) pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x4444) c.clock.step() unsetA(nodes) c.coalReqValid.expect(true.B) c.coalReqBits.address.expect(0x10.U) c.coalReqBits.data.expect(BigInt("11110000000000000000", 16) << 128) // could be any of the 4 reqs c.coalReqBits.mask.expect(0x0f000000) c.coalReqBits.size.expect(4.U) c.coalReqBits.op.expect(1.U) c.clock.step() } } it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in { test(LazyModule(new DummyCoalescingUnitTB()).module) // .withAnnotations(Seq(VcsBackendAnnotation)) { c => .withAnnotations(Seq(VerilatorBackendAnnotation)) { c => println(s"coalIO length = ${c.coalIOs(0).length}") val nodes = c.coalIOs.map(_.head) c.l2IOs.foreach(_.head.a.ready.poke(true.B)) c.coalReqReady.expect(true.B) c.reqQueueEnqReady.foreach(_.expect(true.B)) pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x04, mask = 0xf, data = 0x1111) pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x08, mask = 0xf, data = 0x2222) pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0xf00, mask = 0xf, data = 0x3333) pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0xd00, mask = 0xf, data = 0x4444) c.clock.step() unsetA(nodes) c.coalReqValid.expect(true.B) c.coalReqBits.address.expect(0x00.U) c.coalReqBits.data.expect(BigInt("22220000111100000000", 16)) c.coalReqBits.mask.expect(0x00000ff0) c.coalReqBits.size.expect(4.U) c.coalReqBits.op.expect(1.U) expectVec(c.reqQueueDeqValid, Seq(false.B, false.B, true.B, true.B)) expectVec(c.reqQueueDeqBits.map(_.data), Seq(0x1111.U, 0x2222.U, 0x3333.U, 0x4444.U)) expectVec(c.reqQueueDeqReady, Seq.fill(4)(true.B)) c.clock.step() expectVec(c.reqQueueDeqValid, Seq.fill(4)(false.B)) c.coalReqValid.expect(false.B) } } // it should "not touch uncoalescable requests" in {} // // it should "allow temporal coalescing when depth >=2" in {} // // it should "select the most coverage mono-coalescer" in {} // // it should "resort to the backup policy when coverage is below average" in {} } class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "request shift queues" def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = { ((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) => enq.ready.expect(true.B) enq.valid.poke(valid) enq.bits.poke(ent) } c.clock.step() } def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = { ((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) => deq.valid.expect(valid) deq.bits.expect(ent) } } def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = { (vec zip value).foreach { case (a, b) => a.poke(b) } } it should "work like normal shiftqueue when no invalidate" in { test(new CoalShiftQueue(UInt(8.W),4, testConfig)) { c => c.io.coalescable.foreach(_.poke(true.B)) c.io.queue.deq.foreach(_.ready.poke(false.B)) attemptEnqueue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(2.U), Seq(true.B, false.B, false.B, false.B)) // should remain synchronous attemptEnqueue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B)) c.io.queue.enq.foreach(_.valid.poke(false.B)) c.io.queue.enq.foreach(_.ready.expect(true.B)) // check if head is the first enqueued item expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(false.B)) c.clock.step() c.io.queue.deq.foreach(_.ready.poke(true.B)) // should not dequeue because all are coalescable expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(false.B)) c.clock.step() pokeVec(c.io.coalescable, Seq(false.B, false.B, false.B, true.B)) // first 3 items should be valid now expectDequeue(c, Seq.fill(4)(1.U), Seq(true.B, true.B, true.B, false.B)) // only dequeue first item - 4th item should not be dequeued since not valid pokeVec(c.io.queue.deq.map(_.ready), Seq(true.B, false.B, false.B, true.B)) c.clock.step() // first item should turn invalid c.io.coalescable.foreach(_.poke(false.B)) expectDequeue(c, Seq.fill(4)(1.U), Seq(false.B, true.B, true.B, true.B)) // now dequeue everything else in the first line c.io.queue.deq.foreach(_.ready.poke(true.B)) c.clock.step() // all dequeued && shifted last cycle c.io.coalescable.foreach(_.poke(false.B)) c.io.queue.deq.foreach(_.ready.poke(true.B)) expectDequeue(c, Seq.fill(4)(2.U), Seq(true.B, false.B, false.B, false.B)) c.clock.step() pokeVec(c.io.coalescable, Seq(true.B, false.B, true.B, true.B)) expectDequeue(c, Seq.fill(4)(3.U), Seq(false.B, true.B, false.B, false.B)) c.clock.step() c.io.coalescable.foreach(_.poke(false.B)) expectDequeue(c, Seq.fill(4)(3.U), Seq(true.B, false.B, true.B, true.B)) c.clock.step() // empty expectDequeue(c, Seq.fill(4)(0.U), Seq.fill(4)(false.B)) // now enqueue back to full & test back pressure c.io.queue.deq.foreach(_.ready.poke(false.B)) attemptEnqueue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B)) pokeVec(c.io.coalescable, Seq(true.B, true.B, true.B, true.B)) attemptEnqueue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B)) // check full c.io.queue.enq.foreach(_.ready.expect(false.B)) c.clock.step() // now indicate this cycle will dequeue everything c.io.queue.deq.foreach(_.ready.poke(true.B)) // should still be full, but allow enqueue c.io.coalescable.foreach(_.poke(true.B)) c.io.queue.enq.foreach(_.ready.expect(false.B)) // check full c.io.coalescable.foreach(_.poke(false.B)) attemptEnqueue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B)) expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) c.clock.step() attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B)) } } it should "work when enqueing and dequeueing simultaneously" in { test(new CoalShiftQueue(UInt(8.W), 4, testConfig)) { c => c.io.invalidate.valid.poke(false.B) c.io.coalescable.foreach(_.poke(true.B)) c.io.queue.deq.foreach(_.ready.poke(false.B)) attemptEnqueue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B)) // mark for dequeue c.io.coalescable.foreach(_.poke(false.B)) c.io.queue.deq.foreach(_.ready.poke(true.B)) expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B)) expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B)) expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B)) // disable dequeue c.io.queue.deq.foreach(_.ready.poke(false.B)) expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(7.U), Seq.fill(4)(true.B)) attemptEnqueue(c, Seq.fill(4)(8.U), Seq.fill(4)(true.B)) c.io.queue.enq.foreach(_.ready.expect(false.B)) c.io.queue.deq.foreach(_.ready.poke(true.B)) expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B)) c.clock.step() expectDequeue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B)) c.clock.step() expectDequeue(c, Seq.fill(4)(7.U), Seq.fill(4)(true.B)) c.clock.step() expectDequeue(c, Seq.fill(4)(8.U), Seq.fill(4)(true.B)) c.clock.step() } } /* it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in { test(new CoalShiftQueue(UInt(8.W), 1)) { c => c.io.invalidate.valid.poke(false.B) c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(true.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x12.U) c.clock.step() // enqueue and dequeue simultaneously c.io.queue.deq.ready.poke(true.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x34.U) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x12.U) c.clock.step() // enqueue and dequeue simultaneously once more c.io.queue.deq.ready.poke(true.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x56.U) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x34.U) c.clock.step() // dequeueing back-to-back should work without any holes in the middle c.io.queue.deq.ready.poke(true.B) c.io.queue.enq.valid.poke(false.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x56.U) c.clock.step() // make sure is empty c.io.queue.deq.ready.poke(true.B) c.io.queue.enq.valid.poke(false.B) c.io.queue.deq.valid.expect(false.B) } } it should "work when invalidating and enqueueing to a depth=1 queue" in { test(new CoalShiftQueue(UInt(8.W), 1)) { c => c.io.invalidate.valid.poke(false.B) c.io.allowShift.poke(true.B) // no dequeueing c.io.queue.deq.ready.poke(false.B) // prepare c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x12.U) c.clock.step() // invalidate, but don't allow shift c.io.allowShift.poke(false.B) c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits.poke(0x1.U) // TODO: we might be able to enqueue to a full depth=1 queue whose only // entry just got invalidated, so that enq.ready is true here, but // it is a niche case c.io.queue.enq.ready.expect(false.B) c.clock.step() // now try enqueueing now that we have space c.io.allowShift.poke(true.B) c.io.invalidate.valid.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x34.U) c.io.queue.deq.valid.expect(false.B) c.clock.step() // see if it comes out right next cycle c.io.queue.enq.valid.poke(false.B) c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x34.U) } } it should "invalidate head that is also being dequeued" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x12.U) c.clock.step() c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x34.U) c.clock.step() c.io.queue.enq.valid.poke(false.B) // invalidate should work for the head just being dequeued at the same // cycle c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits.poke(0x1.U) c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(false.B) c.clock.step() // 0x12 should have been dequeued c.io.invalidate.valid.poke(false.B) c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x34.U) } } it should "dequeue invalidated head on its own when allowShift" in { test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c => c.io.invalidate.valid.poke(false.B) c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x12.U) c.clock.step() c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x34.U) c.clock.step() c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x56.U) c.clock.step() c.io.queue.enq.valid.poke(false.B) // invalidate two entries at head c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits.poke(0x3.U) c.io.queue.deq.ready.poke(false.B) // [ 0x56 | 0x34(inv) | 0x12(inv) ] c.clock.step() // [ 0x56 | 0x34(inv) ] c.io.invalidate.valid.poke(false.B) c.io.queue.deq.ready.poke(false.B) c.clock.step() // [ 0x56 ] c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x56.U) c.clock.step() c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(false.B) c.clock.step() // do one more enqueue-then-dequeue to see if used bit was properly cleared c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x78.U) c.clock.step() // should dequeue right away c.io.queue.enq.valid.poke(false.B) c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x78.U) } } it should "overwrite invalidated tail when enqueuing" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) c.io.invalidate.bits.poke(0.U) c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x12.U) c.clock.step() // invalidate and enqueue at the tail at the same time c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits.poke(0x1.U) c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) c.io.queue.enq.bits.poke(0x34.U) c.clock.step() c.io.invalidate.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B) // now should be able to dequeue immediately as tail is overwritten c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x34) } }*/ } class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "uncoalescer" object uncoalescerTestConfig extends CoalescerConfig( enable = true, numLanes = 4, queueDepth = 2, waitTimeout = 8, addressWidth = 24, dataBusWidth = 4, // 128 bit data bus wordSizeInBytes = 4, numOldSrcIds = 16, numNewSrcIds = 4, respQueueDepth = 4, coalLogSizes = Seq(4), sizeEnum = DefaultInFlightTableSizeEnum, numCoalReqs = 1, numArbiterOutputPorts = 4, bankStrideInBytes = 64, ) val config = uncoalescerTestConfig val nonCoalReqT = new NonCoalescedRequest(config) val coalReqT = new CoalescedRequest(config) it should "work in general case" in { test(new Uncoalescer(config, nonCoalReqT, coalReqT)) // vcs helps with simulation time, but sometimes errors with // "mutation occurred during iteration" java error // .withAnnotations(Seq(VcsBackendAnnotation)) { c => // 4 lanes, queue depth 2 c.io.windowElts(0)(0).op.poke(0.U) c.io.windowElts(0)(0).source.poke(1.U) c.io.windowElts(0)(0).address.poke(0x4.U) c.io.windowElts(0)(0).size.poke(2.U) c.io.windowElts(0)(1).op.poke(0.U) c.io.windowElts(0)(1).source.poke(2.U) c.io.windowElts(0)(1).address.poke(0x4.U) // two reqs from one lane c.io.windowElts(0)(1).size.poke(2.U) c.io.windowElts(2)(0).op.poke(0.U) c.io.windowElts(2)(0).source.poke(2.U) c.io.windowElts(2)(0).address.poke(0x8.U) c.io.windowElts(2)(0).size.poke(2.U) c.io.windowElts(2)(1).op.poke(0.U) c.io.windowElts(2)(1).source.poke(2.U) c.io.windowElts(2)(1).address.poke(0xc.U) c.io.windowElts(2)(1).size.poke(2.U) // indicate lane 0 and 2 are used for coalescing c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits(0).poke(0x3.U) // 2'b11 for depth=2 c.io.invalidate.bits(1).poke(0x0.U) c.io.invalidate.bits(2).poke(0x3.U) c.io.invalidate.bits(3).poke(0x0.U) val sourceId = 0.U c.io.coalReq.valid.poke(true.B) c.io.coalReq.bits.source.poke(sourceId) c.io.coalReq.ready.expect(true.B) c.clock.step() c.io.coalReq.valid.poke(false.B) c.io.invalidate.valid.poke(false.B) c.clock.step() c.io.coalResp.valid.poke(true.B) c.io.coalResp.bits.source.poke(sourceId) val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) // val lit = BigInt(0x0123456789abcdefL) c.io.coalResp.bits.data.poke(lit.U) // table lookup is combinational at the same cycle c.io.uncoalResps(0)(0).valid.expect(true.B) c.io.uncoalResps(1)(0).valid.expect(false.B) c.io.uncoalResps(2)(0).valid.expect(true.B) c.io.uncoalResps(3)(0).valid.expect(false.B) // offset is counting from LSB c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(0)(0).bits.source.expect(1.U) c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(0)(1).bits.source.expect(2.U) c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U) c.io.uncoalResps(2)(0).bits.source.expect(2.U) c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U) c.io.uncoalResps(2)(1).bits.source.expect(2.U) } } it should "uncoalesce when coalesced to the same word offset" in { test(new Uncoalescer(config, nonCoalReqT, coalReqT)) // .withAnnotations(Seq(VcsBackendAnnotation)) { c => // 4 lanes, queue depth 2 c.io.windowElts(0)(0).op.poke(0.U) c.io.windowElts(0)(0).source.poke(0.U) c.io.windowElts(0)(0).address.poke(0x4.U) c.io.windowElts(0)(0).size.poke(2.U) c.io.windowElts(1)(0).op.poke(0.U) c.io.windowElts(1)(0).source.poke(1.U) c.io.windowElts(1)(0).address.poke(0x4.U) // two reqs from one lane c.io.windowElts(1)(0).size.poke(2.U) c.io.windowElts(2)(0).op.poke(0.U) c.io.windowElts(2)(0).source.poke(2.U) c.io.windowElts(2)(0).address.poke(0x4.U) c.io.windowElts(2)(0).size.poke(2.U) c.io.windowElts(3)(0).op.poke(0.U) c.io.windowElts(3)(0).source.poke(3.U) c.io.windowElts(3)(0).address.poke(0x4.U) c.io.windowElts(3)(0).size.poke(2.U) // indicate lanes used for coalescing c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits(0).poke(0x1.U) // 2'b01 for enabling head c.io.invalidate.bits(1).poke(0x1.U) c.io.invalidate.bits(2).poke(0x1.U) c.io.invalidate.bits(3).poke(0x1.U) val sourceId = 0.U c.io.coalReq.valid.poke(true.B) c.io.coalReq.bits.source.poke(sourceId) c.io.coalReq.ready.expect(true.B) c.clock.step() c.io.coalReq.valid.poke(false.B) c.io.invalidate.valid.poke(false.B) c.clock.step() c.io.coalResp.valid.poke(true.B) c.io.coalResp.bits.source.poke(sourceId) val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) c.io.coalResp.bits.data.poke(lit.U) // table lookup is combinational at the same cycle // offset is counting from LSB c.io.uncoalResps(0)(0).valid.expect(true.B) c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(0)(0).bits.source.expect(0.U) c.io.uncoalResps(0)(1).valid.expect(false.B) c.io.uncoalResps(1)(0).valid.expect(true.B) c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(1)(0).bits.source.expect(1.U) c.io.uncoalResps(1)(1).valid.expect(false.B) c.io.uncoalResps(2)(0).valid.expect(true.B) c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(2)(0).bits.source.expect(2.U) c.io.uncoalResps(2)(1).valid.expect(false.B) c.io.uncoalResps(3)(0).valid.expect(true.B) c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(3)(0).bits.source.expect(3.U) c.io.uncoalResps(3)(1).valid.expect(false.B) } } }