Create custom response bundle to decouple from TileLink

... and easier unit testing.
This commit is contained in:
Hansung Kim
2023-04-23 13:37:10 -07:00
parent 4d16ec9b08
commit ccf9b95fb5
2 changed files with 154 additions and 139 deletions

View File

@@ -327,7 +327,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
tlCoal.a.bits := coalescer.io.out_req.bits.toTLA(edgeCoal)
tlCoal.b.ready := true.B
tlCoal.c.valid := false.B
tlCoal.d.ready := true.B
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
tlCoal.e.valid := false.B
@@ -475,13 +475,15 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
dontTouch(newEntry)
// Uncoalescer module uncoalesces responses back to each lane
val uncoalescer = Module(new UncoalescingUnit(config, tlCoal.d.bits.cloneType))
val uncoalescer = Module(new UncoalescingUnit(config))
uncoalescer.io.coalReqValid := coalescer.io.out_req.valid
uncoalescer.io.newEntry := newEntry
// richard: I changed this to use the DecoupledIO interface, which TL is using,
// previously we were not handling the ready signal
uncoalescer.io.coalResp <> tlCoal.d
// Cleanup: custom <>?
uncoalescer.io.coalResp.valid := tlCoal.d.valid
uncoalescer.io.coalResp.bits.source := tlCoal.d.bits.source
uncoalescer.io.coalResp.bits.data := tlCoal.d.bits.data
tlCoal.d.ready := uncoalescer.io.coalResp.ready
// Queue up synthesized uncoalesced responses into each lane's response queue
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
@@ -507,7 +509,19 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
dontTouch(tlCoal.d)
}
class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Module {
// Protocol-agnostic bundle that represents a coalesced response.
//
// Having this makes it easier to:
// * do unit tests -- no need to deal with TileLink in the chiseltest code
// * adapt coalescer to custom protocols like a custom L1 cache interface.
//
// FIXME: overlaps with RespQueueEntry. Trait-ify
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
val source = UInt(log2Ceil(config.NUM_NEW_IDS).W)
val data = UInt((8 * (1 << config.MAX_SIZE)).W)
}
class UncoalescingUnit(config: CoalescerConfig) extends Module {
// notes to hansung:
// val numLanes: Int, <-> config.NUM_LANES
// val numPerLaneReqs: Int, <-> config.DEPTH
@@ -518,8 +532,9 @@ class UncoalescingUnit(config: CoalescerConfig, tlCoalD: TLBundleD) extends Modu
val inflightTable = Module(new InflightCoalReqTable(config))
val io = IO(new Bundle {
val coalReqValid = Input(Bool())
// FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer
val newEntry = Input(inflightTable.entryT.cloneType)
val coalResp = Flipped(Decoupled(tlCoalD))
val coalResp = Flipped(Decoupled(new CoalescedResponseBundle(config)))
val uncoalResps = Output(
Vec(
config.NUM_LANES,
@@ -1239,7 +1254,7 @@ class TLRAMCoalescerLoggerTest(timeout: Int = 500000)(implicit p: Parameters)
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
// TODO: use parameters for numLanes
val numLanes = 4
val filename = "test.trace"
val filename = "vecadd.core1.thread4.trace"
val coal = LazyModule(new CoalescingUnit(defaultConfig))
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge

View File

@@ -36,216 +36,220 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
it should "work like normal shiftqueue when no invalidate" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.deq.ready.poke(false.B)
c.io.queue.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.clock.step()
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x56.U)
c.clock.step()
c.io.enq.valid.poke(false.B)
c.io.queue.enq.valid.poke(false.B)
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x12.U)
c.clock.step()
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
c.clock.step()
// enqueue in the middle
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x78.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x78.U)
c.clock.step()
c.io.enq.valid.poke(false.B)
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x56.U)
c.clock.step()
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x78.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x78.U)
c.clock.step()
// should be emptied
c.io.deq.valid.expect(false.B)
c.io.queue.deq.valid.expect(false.B)
}
}
it should "work when enqueing and dequeueing simultaneously" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U)
c.io.invalidate.valid.poke(false.B)
// prepare
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
// enqueue and dequeue simultaneously
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x12.U)
c.clock.step()
// dequeueing back-to-back should work without any holes in the middle
c.io.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
c.clock.step()
// make sure is empty
c.io.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B)
c.io.deq.valid.expect(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.valid.expect(false.B)
}
}
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
c.io.invalidate.poke(0.U)
c.io.invalidate.valid.poke(false.B)
// prepare
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
// enqueue and dequeue simultaneously
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x12.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x12.U)
c.clock.step()
// enqueue and dequeue simultaneously once more
c.io.deq.ready.poke(true.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x56.U)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
c.clock.step()
// dequeueing back-to-back should work without any holes in the middle
c.io.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x56.U)
c.clock.step()
// make sure is empty
c.io.deq.ready.poke(true.B)
c.io.enq.valid.poke(false.B)
c.io.deq.valid.expect(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.valid.expect(false.B)
}
}
it should "invalidate head being dequeued" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U)
c.io.invalidate.valid.poke(false.B)
// prepare
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.clock.step()
c.io.enq.valid.poke(false.B)
c.io.queue.enq.valid.poke(false.B)
// invalidate should work for the head just being dequeued at the same
// cycle. However, it should not change deq.valid right away to avoid
// combinational cycles (see definition).
c.io.invalidate.poke(0x1.U)
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x1.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.clock.step()
// 0x12 should have been dequeued
c.io.invalidate.poke(0.U)
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34.U)
c.io.invalidate.valid.poke(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
}
}
it should "dequeue invalidated entries by itself" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U)
c.io.invalidate.valid.poke(false.B)
// prepare
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.clock.step()
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x56.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x56.U)
c.clock.step()
c.io.enq.valid.poke(false.B)
c.io.queue.enq.valid.poke(false.B)
// invalidate two entries at head
c.io.invalidate.poke(0x3.U)
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x3.U)
c.clock.step()
// 0x12 should have been dequeued now
c.io.invalidate.poke(0x0.U)
c.io.deq.ready.poke(false.B)
c.io.invalidate.valid.poke(false.B)
c.io.queue.deq.ready.poke(false.B)
c.clock.step()
// 0x34 should have been dequeued now
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x56.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x56.U)
c.clock.step()
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(false.B)
}
}
it should "overwrite invalidated tail when enqueuing" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.poke(0.U)
c.io.invalidate.valid.poke(false.B)
c.io.invalidate.bits.poke(0.U)
// prepare
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x12.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
// invalidate and enqueue at the tail at the same time
c.io.invalidate.poke(0x1.U)
c.io.deq.ready.poke(false.B)
c.io.enq.ready.expect(true.B)
c.io.enq.valid.poke(true.B)
c.io.enq.bits.poke(0x34.U)
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x1.U)
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.clock.step()
c.io.invalidate.poke(0x0.U)
c.io.enq.valid.poke(false.B)
c.io.invalidate.valid.poke(false.B)
c.io.queue.enq.valid.poke(false.B)
// now should be able to dequeue immediately as tail is overwritten
c.io.deq.ready.poke(true.B)
c.io.deq.valid.expect(true.B)
c.io.deq.bits.expect(0x34)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34)
}
}
}
@@ -260,15 +264,11 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
val coalDataWidth = 128
val numInflightCoalRequests = 4
val hey = new TLBundleParameters(64, 64, 64, 64, 64)
it should "work" in {
test(
new UncoalescingUnit(
numLanes,
numPerLaneReqs,
sourceWidth,
sizeWidth,
coalDataWidth,
numInflightCoalRequests,
defaultConfig,
)
)
// vcs helps with simulation time, but sometimes errors with