diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 135dfa0..0cb7c91 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -153,10 +153,14 @@ class ReqSourceGen(sourceWidth: Int) extends Module { // A shift-register queue implementation that supports invalidating entries // and exposing queue contents as output IO. (TODO: support deadline) // Initially copied from freechips.rocketchip.util.ShiftQueue. -// If `pipe` is true, support enqueueing to a full queue when also dequeueing. +// The queue only shifts down when `allowShift` is given true. Dequeueing +// works normally, but if allowShift was false, the queue head will stay +// invalid after dequeueing. This option is added in order to synchronize the +// shifting of the queues between lanes to model the SIMD behavior. +// If `pipe` is true, support enqueueing to a full queue when head is being +// dequeued at the next cycle. // Software model: window.py -class CoalShiftQueue[T <: Data]( - gen: T, +class CoalShiftQueue[T <: Data]( gen: T, val entries: Int, pipe: Boolean = true, flow: Boolean = false @@ -164,6 +168,7 @@ class CoalShiftQueue[T <: Data]( val io = IO(new Bundle { val queue = new QueueIO(gen, entries) val invalidate = Input(Valid(UInt(entries.W))) + val allowShift = Input(Bool()) val mask = Output(UInt(entries.W)) val elts = Output(Vec(entries, gen)) // 'QueueIO' provides io.count, but we might not want to use it in the @@ -192,7 +197,7 @@ class CoalShiftQueue[T <: Data]( def paddedUsed = pad({ i: Int => used(i) }) def validAfterInv(i: Int) = valid(i) && (!io.invalidate.valid || !io.invalidate.bits(i)) - val shift = (used =/= 0.U) && (io.queue.deq.ready || !validAfterInv(0)) + val shift = io.allowShift && (used =/= 0.U) && (io.queue.deq.fire || !validAfterInv(0)) for (i <- 0 until entries) { val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1)) val wen = Mux( @@ -540,12 +545,13 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready") reqQueue.io.queue.enq.valid := tlIn.a.valid reqQueue.io.queue.enq.bits := req - // TODO: deq.ready should respect downstream ready + // TODO: deq.ready should respect downstream arbiter reqQueue.io.queue.deq.ready := true.B // invalidate queue entries that contain original core requests that got // coalesced into a wider one reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane) reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid + reqQueue.io.allowShift := true.B // NOTE: this relies on CoalShiftQueue's behavior combinationally // deasserting deq.valid in the same cycle that the head invalidate diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 7469e59..66f6b9f 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -224,6 +224,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "work like normal shiftqueue when no invalidate" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.queue.deq.ready.poke(false.B) + c.io.allowShift.poke(true.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) @@ -272,6 +273,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "work when enqueing and dequeueing simultaneously" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(true.B) @@ -303,6 +305,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "work when enqueing and dequeueing simultaneously to a full queue" in { test(new CoalShiftQueue(UInt(8.W), 1)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(true.B) @@ -342,6 +345,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "invalidate head being dequeued" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) @@ -374,6 +378,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "dequeue invalidated entries by itself" in { test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) @@ -414,6 +419,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) c.io.invalidate.bits.poke(0.U) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B)