diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 2aa87d4..a206f75 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -79,7 +79,7 @@ object defaultConfig extends CoalescerConfig( sizeEnum = DefaultInFlightTableSizeEnum, numCoalReq = 1, arbiterOutputs = 4, - bankStrideInBytes = 64 //Current L2 is strided by 512 bits + bankStrideInBytes = 64 // Current L2 is strided by 512 bits ) class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends LazyModule { @@ -236,7 +236,7 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e c && !(io.invalidate.valid && i) }.reduce(_ || _) val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _) - val syncedDeqValid = io.queue.deq.map(_.valid).reduce(_ || _) + val syncedDeqValid = io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) // valid and not fire for (i <- 0 until config.numLanes) { val enq = io.queue.enq(i) @@ -312,7 +312,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], // number of entries matched with this leader lane's head. // maximum is numLanes * queueDepth val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W)) - val coverageHits = Output(UInt((1 << config.maxCoalLogSize).W)) + val coverageHits = Output(UInt((config.maxCoalLogSize - config.wordWidth + 1).W)) val canCoalesce = Output(Vec(config.numLanes, Bool())) }) }) @@ -394,7 +394,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth) // 2-D table flattened to 1-D val offsets = io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) - val valids = io.window.mask.flatMap(_.asBools) + val valids = chosenMatches.flatMap(_.asBools) // indicates for each word in the coalesced chunk whether it is accessed by // any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four // words in the coalesced data coming back will be accessed by some request @@ -454,7 +454,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE x.zipWithIndex.map { case (a, b) => (a, b.U) }.reduce[(UInt, UInt)] { case ((a, i), (b, j)) => - (Mux(a >= b, a, b), Mux(a >= b, i, j)) // TODO: tie-breaker + (Mux(a > b, a, b), Mux(a > b, i, j)) // > instead of >= here; want to use largest size }._2 } @@ -466,7 +466,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W)) val chosenValid = Wire(Bool()) // minimum 25% coverage - val minCoverage = 1.max(1 << ((config.maxCoalLogSize - 2) - 2)) + val minCoverage = 1.max(1 << ((config.maxCoalLogSize - config.wordWidth) - 2)) when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { chosenSizeIdx := argMax(normalizedHits) @@ -1787,11 +1787,12 @@ class CoalArbiter(config: CoalescerConfig) (implicit p: Parameters) extends Lazy log2Ceil(config.numOldSrcIds), config.wordWidth, log2Ceil(config.wordSizeInBytes) - ) + ) val respCoalBundleT = new CoalescedResponseBundle(config) - lazy val module = new CoalArbiterImpl(this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT) + lazy val module = new CoalArbiterImpl( + this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT) } @@ -1804,7 +1805,7 @@ class CoalArbiterImpl(outer: CoalArbiter, ) extends LazyModuleImp(outer){ - val io =IO(new Bundle { + val io = IO(new Bundle { val nonCoalVec = Vec(config.numLanes, Flipped(Decoupled(nonCoalEntryT.cloneType))) val coalVec = Vec(config.numCoalReq, Flipped(Decoupled(coalEntryT.cloneType))) val respNonCoalVec = Vec(config.numLanes, Decoupled(respNonCoalEntryT.cloneType)) diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 4d6c346..3ba7846 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -97,6 +97,8 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI val coalIO3 = outer.cpuNodes(3).makeIOs() val coalIOs = Seq(coalIO0, coalIO1, coalIO2, coalIO3) + val l2IO0 = outer.l2Nodes(0).makeIOs() + // val coalMasterNode = coal.coalescerNode.makeIOs() private val reqQueues = coal.module.reqQueues @@ -183,7 +185,9 @@ object testConfig extends CoalescerConfig( respQueueDepth = 4, coalLogSizes = Seq(4, 5), sizeEnum = DefaultInFlightTableSizeEnum, - arbiterOutputs = 4 + arbiterOutputs = 4, + numCoalReq = 1, + bankStrideInBytes = 64 ) class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { @@ -193,7 +197,7 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { def pokeA( nodes: Seq[TLBundle], - idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int, + idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Long, valid: Boolean = true, ): Unit = { val node = nodes(idx) @@ -221,11 +225,13 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { it should "coalesce fully consecutive accesses at size 4, only once" in { test(LazyModule(new DummyCoalescingUnitTB()).module) - .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) + .withAnnotations(Seq(VerilatorBackendAnnotation, WriteFstAnnotation)) +// .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c => println(s"coalIO length = ${c.coalIOs(0).length}") val nodes = c.coalIOs.map(_.head) // TODO: this doesn't work + c.l2IO0.head.a.ready.poke(true.B) // c.coalMasterNode.head.a.ready.poke(true.B) c.reqQueueEnqReady.foreach(_.expect(true.B)) @@ -246,7 +252,7 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { c.coalReqBits.size.expect(4.U) c.coalReqBits.op.expect(1.U) -// c.coalReqReady.expect(true.B) + c.coalReqReady.expect(true.B) c.reqQueueEnqReady.foreach(_.expect(true.B)) pokeA(nodes, idx = 0, op = 1, size = 2, source = 1, addr = 0xf20, mask = 0xf, data = 0x5555) pokeA(nodes, idx = 1, op = 1, size = 2, source = 1, addr = 0xf24, mask = 0xf, data = 0x6666, valid = false) @@ -257,12 +263,27 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { c.coalReqValid.expect(true.B) c.coalReqBits.address.expect(0xf20.U) c.coalReqBits.data.expect(BigInt("77770000000000005555", 16)) // technically these can be dontcare's - c.coalReqBits.mask.expect(0x0000ffff) + c.coalReqBits.mask.expect(0x00000f0f) c.coalReqBits.size.expect(4.U) c.coalReqBits.op.expect(1.U) + c.coalReqReady.expect(true.B) + c.reqQueueEnqReady.foreach(_.expect(true.B)) + pokeA(nodes, idx = 0, op = 0, size = 2, source = 2, addr = 0xd04, mask = 0xa, data = 0xdeadbeefL) + pokeA(nodes, idx = 1, op = 0, size = 2, source = 2, addr = 0xd0c, mask = 0xb, data = 0x8badf00dL) + pokeA(nodes, idx = 2, op = 0, size = 2, source = 2, addr = 0xd14, mask = 0xc, data = 0xcafeb0baL) + pokeA(nodes, idx = 3, op = 0, size = 2, source = 2, addr = 0xd1c, mask = 0xd, data = 0xdabbad00L) c.clock.step() + + c.coalReqValid.expect(true.B) + c.coalReqBits.address.expect(0xd00.U) + c.coalReqBits.size.expect(5.U) + c.coalReqBits.data.expect(BigInt("dabbad0000000000cafeb0ba000000008badf00d00000000deadbeef00000000", 16)) + c.coalReqBits.mask.expect(0xd0c0b0a0L) + c.coalReqBits.op.expect(0.U) + c.clock.step() +// c.clock.step() } } @@ -661,7 +682,9 @@ object uncoalescerTestConfig extends CoalescerConfig( respQueueDepth = 4, coalLogSizes = Seq(4), sizeEnum = DefaultInFlightTableSizeEnum, - arbiterOutputs = 4 + numCoalReq = 1, + arbiterOutputs = 4, + bankStrideInBytes = 64, ) class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {