Create custom response bundle to decouple from TileLink

... and easier unit testing.
This commit is contained in:
Hansung Kim
2023-04-23 13:37:10 -07:00
parent 4d16ec9b08
commit ccf9b95fb5
2 changed files with 154 additions and 139 deletions

View File

@@ -327,7 +327,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
tlCoal.a.bits := coalescer.io.out_req.bits.toTLA(edgeCoal) tlCoal.a.bits := coalescer.io.out_req.bits.toTLA(edgeCoal)
tlCoal.b.ready := true.B tlCoal.b.ready := true.B
tlCoal.c.valid := false.B tlCoal.c.valid := false.B
tlCoal.d.ready := true.B // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
tlCoal.e.valid := false.B tlCoal.e.valid := false.B
@@ -475,13 +475,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
dontTouch(newEntry) dontTouch(newEntry)
// Uncoalescer module uncoalesces responses back to each lane // Uncoalescer module uncoalesces responses back to each lane
val uncoalescer = Module(new UncoalescingUnit(config, tlCoal.d.bits.cloneType)) val uncoalescer = Module(new UncoalescingUnit(config))
uncoalescer.io.coalReqValid := coalescer.io.out_req.valid uncoalescer.io.coalReqValid := coalescer.io.out_req.valid
uncoalescer.io.newEntry := newEntry uncoalescer.io.newEntry := newEntry
// richard: I changed this to use the DecoupledIO interface, which TL is using, // Cleanup: custom <>?
// previously we were not handling the ready signal uncoalescer.io.coalResp.valid := tlCoal.d.valid
uncoalescer.io.coalResp <> tlCoal.d uncoalescer.io.coalResp.bits.source := tlCoal.d.bits.source
uncoalescer.io.coalResp.bits.data := tlCoal.d.bits.data
tlCoal.d.ready := uncoalescer.io.coalResp.ready
// Queue up synthesized uncoalesced responses into each lane's response queue // Queue up synthesized uncoalesced responses into each lane's response queue
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) => (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
@@ -507,7 +509,19 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
dontTouch(tlCoal.d) dontTouch(tlCoal.d)
} }
class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Module { // Protocol-agnostic bundle that represents a coalesced response.
//
// Having this makes it easier to:
// * do unit tests -- no need to deal with TileLink in the chiseltest code
// * adapt coalescer to custom protocols like a custom L1 cache interface.
//
// FIXME: overlaps with RespQueueEntry. Trait-ify
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
val source = UInt(log2Ceil(config.NUM_NEW_IDS).W)
val data = UInt((8 * (1 << config.MAX_SIZE)).W)
}
class UncoalescingUnit(config: CoalescerConfig) extends Module {
// notes to hansung: // notes to hansung:
// val numLanes: Int, <-> config.NUM_LANES // val numLanes: Int, <-> config.NUM_LANES
// val numPerLaneReqs: Int, <-> config.DEPTH // val numPerLaneReqs: Int, <-> config.DEPTH
@@ -518,8 +532,9 @@ class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Modu
val inflightTable = Module(new InflightCoalReqTable(config)) val inflightTable = Module(new InflightCoalReqTable(config))
val io = IO(new Bundle { val io = IO(new Bundle {
val coalReqValid = Input(Bool()) val coalReqValid = Input(Bool())
// FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer
val newEntry = Input(inflightTable.entryT.cloneType) val newEntry = Input(inflightTable.entryT.cloneType)
val coalResp = Flipped(Decoupled(tlCoalD)) val coalResp = Flipped(Decoupled(new CoalescedResponseBundle(config)))
val uncoalResps = Output( val uncoalResps = Output(
Vec( Vec(
config.NUM_LANES, config.NUM_LANES,
@@ -1239,7 +1254,7 @@ class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters)
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule { class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
// TODO: use parameters for numLanes // TODO: use parameters for numLanes
val numLanes = 4 val numLanes = 4
val filename = "test.trace" val filename = "vecadd.core1.thread4.trace"
val coal = LazyModule(new CoalescingUnit(defaultConfig)) val coal = LazyModule(new CoalescingUnit(defaultConfig))
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename)) val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge

View File

@@ -36,216 +36,220 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
it should "work like normal shiftqueue when no invalidate" in { it should "work like normal shiftqueue when no invalidate" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c => test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U) c.io.queue.enq.bits.poke(0x12.U)
c.clock.step() c.clock.step()
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U) c.io.queue.enq.bits.poke(0x34.U)
c.clock.step() c.clock.step()
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U) c.io.queue.enq.bits.poke(0x56.U)
c.clock.step() c.clock.step()
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U) c.io.queue.deq.bits.expect(0x12.U)
c.clock.step() c.clock.step()
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U) c.io.queue.deq.bits.expect(0x34.U)
c.clock.step() c.clock.step()
// enqueue in the middle // enqueue in the middle
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x78.U) c.io.queue.enq.bits.poke(0x78.U)
c.clock.step() c.clock.step()
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U) c.io.queue.deq.bits.expect(0x56.U)
c.clock.step() c.clock.step()
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x78.U) c.io.queue.deq.bits.expect(0x78.U)
c.clock.step() c.clock.step()
// should be emptied // should be emptied
c.io.deq.valid.expect(false.B) c.io.queue.deq.valid.expect(false.B)
} }
} }
it should "work when enqueing and dequeueing simultaneously" in { it should "work when enqueing and dequeueing simultaneously" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c => test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U) c.io.invalidate.valid.poke(false.B)
// prepare // prepare
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U) c.io.queue.enq.bits.poke(0x12.U)
c.clock.step() c.clock.step()
// enqueue and dequeue simultaneously // enqueue and dequeue simultaneously
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U) c.io.queue.enq.bits.poke(0x34.U)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U) c.io.queue.deq.bits.expect(0x12.U)
c.clock.step() c.clock.step()
// dequeueing back-to-back should work without any holes in the middle // dequeueing back-to-back should work without any holes in the middle
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U) c.io.queue.deq.bits.expect(0x34.U)
c.clock.step() c.clock.step()
// make sure is empty // make sure is empty
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
c.io.deq.valid.expect(false.B) c.io.queue.deq.valid.expect(false.B)
} }
} }
it should "work when enqueing and dequeueing simultaneously to a full queue" in { it should "work when enqueing and dequeueing simultaneously to a full queue" in {
test(new CoalShiftQueue(UInt(8.W), 1)) { c => test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
c.io.invalidate.poke(0.U) c.io.invalidate.valid.poke(false.B)
// prepare // prepare
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U) c.io.queue.enq.bits.poke(0x12.U)
c.clock.step() c.clock.step()
// enqueue and dequeue simultaneously // enqueue and dequeue simultaneously
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U) c.io.queue.enq.bits.poke(0x34.U)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U) c.io.queue.deq.bits.expect(0x12.U)
c.clock.step() c.clock.step()
// enqueue and dequeue simultaneously once more // enqueue and dequeue simultaneously once more
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U) c.io.queue.enq.bits.poke(0x56.U)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U) c.io.queue.deq.bits.expect(0x34.U)
c.clock.step() c.clock.step()
// dequeueing back-to-back should work without any holes in the middle // dequeueing back-to-back should work without any holes in the middle
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U) c.io.queue.deq.bits.expect(0x56.U)
c.clock.step() c.clock.step()
// make sure is empty // make sure is empty
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
c.io.deq.valid.expect(false.B) c.io.queue.deq.valid.expect(false.B)
} }
} }
it should "invalidate head being dequeued" in { it should "invalidate head being dequeued" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c => test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U) c.io.invalidate.valid.poke(false.B)
// prepare // prepare
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U) c.io.queue.enq.bits.poke(0x12.U)
c.clock.step() c.clock.step()
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U) c.io.queue.enq.bits.poke(0x34.U)
c.clock.step() c.clock.step()
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
// invalidate should work for the head just being dequeued at the same // invalidate should work for the head just being dequeued at the same
// cycle. However, it should not change deq.valid right away to avoid // cycle. However, it should not change deq.valid right away to avoid
// combinational cycles (see definition). // combinational cycles (see definition).
c.io.invalidate.poke(0x1.U) c.io.invalidate.valid.poke(true.B)
c.io.deq.ready.poke(true.B) c.io.invalidate.bits.poke(0x1.U)
c.io.deq.valid.expect(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.clock.step() c.clock.step()
// 0x12 should have been dequeued // 0x12 should have been dequeued
c.io.invalidate.poke(0.U) c.io.invalidate.valid.poke(false.B)
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U) c.io.queue.deq.bits.expect(0x34.U)
} }
} }
it should "dequeue invalidated entries by itself" in { it should "dequeue invalidated entries by itself" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c => test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U) c.io.invalidate.valid.poke(false.B)
// prepare // prepare
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U) c.io.queue.enq.bits.poke(0x12.U)
c.clock.step() c.clock.step()
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U) c.io.queue.enq.bits.poke(0x34.U)
c.clock.step() c.clock.step()
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U) c.io.queue.enq.bits.poke(0x56.U)
c.clock.step() c.clock.step()
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
// invalidate two entries at head // invalidate two entries at head
c.io.invalidate.poke(0x3.U) c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x3.U)
c.clock.step() c.clock.step()
// 0x12 should have been dequeued now // 0x12 should have been dequeued now
c.io.invalidate.poke(0x0.U) c.io.invalidate.valid.poke(false.B)
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.clock.step() c.clock.step()
// 0x34 should have been dequeued now // 0x34 should have been dequeued now
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U) c.io.queue.deq.bits.expect(0x56.U)
c.clock.step() c.clock.step()
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(false.B) c.io.queue.deq.valid.expect(false.B)
} }
} }
it should "overwrite invalidated tail when enqueuing" in { it should "overwrite invalidated tail when enqueuing" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c => test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U) c.io.invalidate.valid.poke(false.B)
c.io.invalidate.bits.poke(0.U)
// prepare // prepare
c.io.deq.ready.poke(false.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U) c.io.queue.enq.bits.poke(0x12.U)
c.clock.step() c.clock.step()
// invalidate and enqueue at the tail at the same time // invalidate and enqueue at the tail at the same time
c.io.invalidate.poke(0x1.U) c.io.invalidate.valid.poke(true.B)
c.io.deq.ready.poke(false.B) c.io.invalidate.bits.poke(0x1.U)
c.io.enq.ready.expect(true.B) c.io.queue.deq.ready.poke(false.B)
c.io.enq.valid.poke(true.B) c.io.queue.enq.ready.expect(true.B)
c.io.enq.bits.poke(0x34.U) c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.clock.step() c.clock.step()
c.io.invalidate.poke(0x0.U) c.io.invalidate.valid.poke(false.B)
c.io.enq.valid.poke(false.B) c.io.queue.enq.valid.poke(false.B)
// now should be able to dequeue immediately as tail is overwritten // now should be able to dequeue immediately as tail is overwritten
c.io.deq.ready.poke(true.B) c.io.queue.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B) c.io.queue.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34) c.io.queue.deq.bits.expect(0x34)
} }
} }
} }
@@ -260,15 +264,11 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
val coalDataWidth = 128 val coalDataWidth = 128
val numInflightCoalRequests = 4 val numInflightCoalRequests = 4
val hey = new TLBundleParameters(64, 64, 64, 64, 64)
it should "work" in { it should "work" in {
test( test(
new UncoalescingUnit( new UncoalescingUnit(
numLanes, defaultConfig,
numPerLaneReqs,
sourceWidth,
sizeWidth,
coalDataWidth,
numInflightCoalRequests,
) )
) )
// vcs helps with simulation time, but sometimes errors with // vcs helps with simulation time, but sometimes errors with