diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 8c572ac..fda9865 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -458,46 +458,62 @@ class InflightCoalReqTableEntry( // Mostly copied from freechips.rocketchip.util.ShiftQueue, except that every // queue entry and its valid signal are exposed as output IO. -class CoalShiftQueue[T <: Data](gen: T, - val entries: Int, - pipe: Boolean = false, - flow: Boolean = false) - extends Module { +class CoalShiftQueue[T <: Data]( + gen: T, + val entries: Int, + pipe: Boolean = false, + flow: Boolean = false +) extends Module { val io = IO(new QueueIO(gen, entries) { + val invalidate = Input(UInt(entries.W)) val mask = Output(UInt(entries.W)) val elts = Output(Vec(entries, gen)) }) private val valid = RegInit(VecInit(Seq.fill(entries) { false.B })) + // Need to maintain a wptr because we can't simply tell where the queue tail + // is by just looking for invalid slots, because there may be holes in the middle + private val wptr = RegInit(UInt(entries.W), 1.U) private val elts = Reg(Vec(entries, gen)) for (i <- 0 until entries) { def paddedValid(i: Int) = if (i == -1) true.B else if (i == entries) false.B else valid(i) - val wdata = if (i == entries-1) io.enq.bits else Mux(valid(i+1), elts(i+1), io.enq.bits) - val wen = - Mux(io.deq.ready, - paddedValid(i+1) || io.enq.fire() && ((i == 0 && !flow).B || valid(i)), - io.enq.fire() && paddedValid(i-1) && !valid(i)) - when (wen) { elts(i) := wdata } + // val wdata = if (i == entries - 1) io.enq.bits else Mux(valid(i + 1), elts(i + 1), io.enq.bits) + val wdata = if (i == entries - 1) io.enq.bits else Mux(wptr(i), io.enq.bits, elts(i + 1)) + val wen = Mux( + io.deq.ready, + paddedValid(i + 1) || io.enq.fire && ((i == 0 && !flow).B || valid(i)), + io.enq.fire && paddedValid(i - 1) && !valid(i) + ) + when(wen) { elts(i) := wdata } - valid(i) := - Mux(io.deq.ready, - paddedValid(i+1) || io.enq.fire() && ((i == 0 && !flow).B || valid(i)), - io.enq.fire() && paddedValid(i-1) || valid(i)) + valid(i) := Mux( + io.deq.ready, + paddedValid(i + 1) || io.enq.fire && ((i == 0 && !flow).B || valid(i)), + io.enq.fire && paddedValid(i - 1) || valid(i) + ) } - io.enq.ready := !valid(entries-1) + when(io.enq.fire) { + when(!io.deq.fire) { + wptr := wptr << 1.U + } + }.elsewhen(io.deq.fire) { + wptr := wptr >> 1.U + } + + io.enq.ready := !valid(entries - 1) io.deq.valid := valid(0) io.deq.bits := elts.head if (flow) { - when (io.enq.valid) { io.deq.valid := true.B } - when (!valid(0)) { io.deq.bits := io.enq.bits } + when(io.enq.valid) { io.deq.valid := true.B } + when(!valid(0)) { io.deq.bits := io.enq.bits } } if (pipe) { - when (io.deq.ready) { io.enq.ready := true.B } + when(io.deq.ready) { io.enq.ready := true.B } } io.mask := valid.asUInt @@ -505,9 +521,13 @@ class CoalShiftQueue[T <: Data](gen: T, io.count := PopCount(io.mask) } -object CoalShiftQueue -{ - def apply[T <: Data](enq: DecoupledIO[T], entries: Int = 2, pipe: Boolean = false, flow: Boolean = false): DecoupledIO[T] = { +object CoalShiftQueue { + def apply[T <: Data]( + enq: DecoupledIO[T], + entries: Int = 2, + pipe: Boolean = false, + flow: Boolean = false + ): DecoupledIO[T] = { val q = Module(new CoalShiftQueue(enq.bits.cloneType, entries, pipe, flow)) q.io.enq <> enq q.io.deq diff --git a/src/test/scala/CoalescingUnitTest.scala b/src/test/scala/CoalescingUnitTest.scala index b65110a..8d50cd3 100644 --- a/src/test/scala/CoalescingUnitTest.scala +++ b/src/test/scala/CoalescingUnitTest.scala @@ -29,6 +29,58 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester { } } +class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { + behavior of "request queues" + + it should "work like normal shiftqueue when no invalidate" in { + test(new CoalShiftQueue(UInt(8.W), 4)) { c => + c.io.deq.ready.poke(false.B) + + c.io.enq.ready.expect(true.B) + c.io.enq.valid.poke(true.B) + c.io.enq.bits.poke(0x12.U) + c.clock.step() + c.io.enq.ready.expect(true.B) + c.io.enq.valid.poke(true.B) + c.io.enq.bits.poke(0x34.U) + c.clock.step() + c.io.enq.ready.expect(true.B) + c.io.enq.valid.poke(true.B) + c.io.enq.bits.poke(0x56.U) + c.clock.step() + + c.io.enq.valid.poke(false.B) + + c.io.deq.ready.poke(true.B) + c.io.deq.valid.expect(true.B) + c.io.deq.bits.expect(0x12.U) + c.clock.step() + c.io.deq.ready.poke(true.B) + c.io.deq.valid.expect(true.B) + c.io.deq.bits.expect(0x34.U) + c.clock.step() + // enqueue in the middle + c.io.deq.ready.poke(false.B) + c.io.enq.ready.expect(true.B) + c.io.enq.valid.poke(true.B) + c.io.enq.bits.poke(0x78.U) + c.clock.step() + c.io.enq.valid.poke(false.B) + c.io.deq.ready.poke(true.B) + c.io.deq.valid.expect(true.B) + c.io.deq.bits.expect(0x56.U) + c.clock.step() + c.io.deq.ready.poke(true.B) + c.io.deq.valid.expect(true.B) + c.io.deq.bits.expect(0x78.U) + c.clock.step() + + // should be emptied + c.io.deq.valid.expect(false.B) + } + } +} + class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "uncoalescer" val numLanes = 4