Create custom response bundle to decouple from TileLink
... and easier unit testing.
This commit is contained in:
@@ -327,7 +327,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
tlCoal.a.bits := coalescer.io.out_req.bits.toTLA(edgeCoal)
|
||||
tlCoal.b.ready := true.B
|
||||
tlCoal.c.valid := false.B
|
||||
tlCoal.d.ready := true.B
|
||||
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
|
||||
tlCoal.e.valid := false.B
|
||||
|
||||
|
||||
@@ -475,13 +475,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
dontTouch(newEntry)
|
||||
|
||||
// Uncoalescer module uncoalesces responses back to each lane
|
||||
val uncoalescer = Module(new UncoalescingUnit(config, tlCoal.d.bits.cloneType))
|
||||
val uncoalescer = Module(new UncoalescingUnit(config))
|
||||
|
||||
uncoalescer.io.coalReqValid := coalescer.io.out_req.valid
|
||||
uncoalescer.io.newEntry := newEntry
|
||||
// richard: I changed this to use the DecoupledIO interface, which TL is using,
|
||||
// previously we were not handling the ready signal
|
||||
uncoalescer.io.coalResp <> tlCoal.d
|
||||
// Cleanup: custom <>?
|
||||
uncoalescer.io.coalResp.valid := tlCoal.d.valid
|
||||
uncoalescer.io.coalResp.bits.source := tlCoal.d.bits.source
|
||||
uncoalescer.io.coalResp.bits.data := tlCoal.d.bits.data
|
||||
tlCoal.d.ready := uncoalescer.io.coalResp.ready
|
||||
|
||||
// Queue up synthesized uncoalesced responses into each lane's response queue
|
||||
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
||||
@@ -507,7 +509,19 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
dontTouch(tlCoal.d)
|
||||
}
|
||||
|
||||
class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Module {
|
||||
// Protocol-agnostic bundle that represents a coalesced response.
|
||||
//
|
||||
// Having this makes it easier to:
|
||||
// * do unit tests -- no need to deal with TileLink in the chiseltest code
|
||||
// * adapt coalescer to custom protocols like a custom L1 cache interface.
|
||||
//
|
||||
// FIXME: overlaps with RespQueueEntry. Trait-ify
|
||||
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
|
||||
val source = UInt(log2Ceil(config.NUM_NEW_IDS).W)
|
||||
val data = UInt((8 * (1 << config.MAX_SIZE)).W)
|
||||
}
|
||||
|
||||
class UncoalescingUnit(config: CoalescerConfig) extends Module {
|
||||
// notes to hansung:
|
||||
// val numLanes: Int, <-> config.NUM_LANES
|
||||
// val numPerLaneReqs: Int, <-> config.DEPTH
|
||||
@@ -518,8 +532,9 @@ class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Modu
|
||||
val inflightTable = Module(new InflightCoalReqTable(config))
|
||||
val io = IO(new Bundle {
|
||||
val coalReqValid = Input(Bool())
|
||||
// FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer
|
||||
val newEntry = Input(inflightTable.entryT.cloneType)
|
||||
val coalResp = Flipped(Decoupled(tlCoalD))
|
||||
val coalResp = Flipped(Decoupled(new CoalescedResponseBundle(config)))
|
||||
val uncoalResps = Output(
|
||||
Vec(
|
||||
config.NUM_LANES,
|
||||
@@ -1239,7 +1254,7 @@ class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters)
|
||||
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
|
||||
// TODO: use parameters for numLanes
|
||||
val numLanes = 4
|
||||
val filename = "test.trace"
|
||||
val filename = "vecadd.core1.thread4.trace"
|
||||
val coal = LazyModule(new CoalescingUnit(defaultConfig))
|
||||
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
|
||||
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
|
||||
|
||||
@@ -36,216 +36,220 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
|
||||
it should "work like normal shiftqueue when no invalidate" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x12.U)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x34.U)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.clock.step()
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x56.U)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x56.U)
|
||||
c.clock.step()
|
||||
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x12.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x12.U)
|
||||
c.clock.step()
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x34.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
c.clock.step()
|
||||
// enqueue in the middle
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x78.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x78.U)
|
||||
c.clock.step()
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x56.U)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x56.U)
|
||||
c.clock.step()
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x78.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x78.U)
|
||||
c.clock.step()
|
||||
|
||||
// should be emptied
|
||||
c.io.deq.valid.expect(false.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
it should "work when enqueing and dequeueing simultaneously" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.poke(0.U)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
|
||||
// prepare
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x12.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
// enqueue and dequeue simultaneously
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x34.U)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x12.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x12.U)
|
||||
c.clock.step()
|
||||
// dequeueing back-to-back should work without any holes in the middle
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x34.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
c.clock.step()
|
||||
// make sure is empty
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.deq.valid.expect(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||
c.io.invalidate.poke(0.U)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
|
||||
// prepare
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x12.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
// enqueue and dequeue simultaneously
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x34.U)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x12.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x12.U)
|
||||
c.clock.step()
|
||||
// enqueue and dequeue simultaneously once more
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x56.U)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x34.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x56.U)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
c.clock.step()
|
||||
// dequeueing back-to-back should work without any holes in the middle
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x56.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x56.U)
|
||||
c.clock.step()
|
||||
// make sure is empty
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.deq.valid.expect(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
it should "invalidate head being dequeued" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.poke(0.U)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
|
||||
// prepare
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x12.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x34.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.clock.step()
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
|
||||
// invalidate should work for the head just being dequeued at the same
|
||||
// cycle. However, it should not change deq.valid right away to avoid
|
||||
// combinational cycles (see definition).
|
||||
c.io.invalidate.poke(0x1.U)
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.invalidate.valid.poke(true.B)
|
||||
c.io.invalidate.bits.poke(0x1.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.clock.step()
|
||||
// 0x12 should have been dequeued
|
||||
c.io.invalidate.poke(0.U)
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x34.U)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
}
|
||||
}
|
||||
|
||||
it should "dequeue invalidated entries by itself" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.poke(0.U)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
|
||||
// prepare
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x12.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x34.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.clock.step()
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x56.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x56.U)
|
||||
c.clock.step()
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
|
||||
// invalidate two entries at head
|
||||
c.io.invalidate.poke(0x3.U)
|
||||
c.io.invalidate.valid.poke(true.B)
|
||||
c.io.invalidate.bits.poke(0x3.U)
|
||||
c.clock.step()
|
||||
// 0x12 should have been dequeued now
|
||||
c.io.invalidate.poke(0x0.U)
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.clock.step()
|
||||
// 0x34 should have been dequeued now
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x56.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x56.U)
|
||||
c.clock.step()
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
it should "overwrite invalidated tail when enqueuing" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.poke(0.U)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.invalidate.bits.poke(0.U)
|
||||
|
||||
// prepare
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x12.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
// invalidate and enqueue at the tail at the same time
|
||||
c.io.invalidate.poke(0x1.U)
|
||||
c.io.deq.ready.poke(false.B)
|
||||
c.io.enq.ready.expect(true.B)
|
||||
c.io.enq.valid.poke(true.B)
|
||||
c.io.enq.bits.poke(0x34.U)
|
||||
c.io.invalidate.valid.poke(true.B)
|
||||
c.io.invalidate.bits.poke(0x1.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.clock.step()
|
||||
c.io.invalidate.poke(0x0.U)
|
||||
c.io.enq.valid.poke(false.B)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
// now should be able to dequeue immediately as tail is overwritten
|
||||
c.io.deq.ready.poke(true.B)
|
||||
c.io.deq.valid.expect(true.B)
|
||||
c.io.deq.bits.expect(0x34)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -260,15 +264,11 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
val coalDataWidth = 128
|
||||
val numInflightCoalRequests = 4
|
||||
|
||||
val hey = new TLBundleParameters(64, 64, 64, 64, 64)
|
||||
it should "work" in {
|
||||
test(
|
||||
new UncoalescingUnit(
|
||||
numLanes,
|
||||
numPerLaneReqs,
|
||||
sourceWidth,
|
||||
sizeWidth,
|
||||
coalDataWidth,
|
||||
numInflightCoalRequests,
|
||||
defaultConfig,
|
||||
)
|
||||
)
|
||||
// vcs helps with simulation time, but sometimes errors with
|
||||
|
||||
Reference in New Issue
Block a user