active byte lane implementation for multi coalescer & add one shift queue test

This commit is contained in:
Richard Yan
2023-05-02 00:07:45 -07:00
parent 6757ea1bbd
commit 997b421c42
2 changed files with 84 additions and 61 deletions

View File

@@ -52,6 +52,7 @@ case class CoalescerConfig(
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
// each size is log(byteSize)
sizeEnum: InFlightTableSizeEnum,
arbiterOutputs: Int
) {
// maximum coalesced size
def maxCoalLogSize: Int = coalLogSizes.max
@@ -70,13 +71,25 @@ object defaultConfig extends CoalescerConfig(
numNewSrcIds = 4,
respQueueDepth = 4,
coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum
sizeEnum = DefaultInFlightTableSizeEnum,
arbiterOutputs = 4
)
class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends LazyModule {
// Identity node that captures the incoming TL requests and passes them
// through the other end, dropping coalesced requests. This node is what
// will be visible to upstream and downstream nodes.
// Nexus node that captures the incoming TL requests, rewrites coalescable requests,
// and arbitrates between non-coalesced and coalesced requests to a fix number of outputs
// before sending it out to memory. This node is what's visible to upstream and downstream nodes.
// WIP:
// val node = TLNexusNode(
// clientFn = c => c.head,
// managerFn = m => m.head // assuming arbiter generated ids are distinct between edges
// )
// node.in.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.wordSizeInBytes,
// s"input edges into coalescer node does not have beatBytes = ${config.wordSizeInBytes}"))
// node.out.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.maxCoalLogSize,
// s"output edges into coalescer node does not have beatBytes = ${config.maxCoalLogSize}"))
val node = TLIdentityNode()
// Number of maximum in-flight coalesced requests. The upper bound of this
@@ -452,7 +465,9 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
for (i <- 0 until maxWords) {
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U)
// note: ANDing against addrMask is to conform to active byte lanes requirements
// if aligning to LSB suffices, we should add the bitwise AND back
m && ((req.address(config.maxCoalLogSize - 1, 0)/* & addrMask*/) === i.U)
}
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>

View File

@@ -114,7 +114,8 @@ object testConfig extends CoalescerConfig(
numNewSrcIds = 4,
respQueueDepth = 4,
coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum
sizeEnum = DefaultInFlightTableSizeEnum,
arbiterOutputs = 4
)
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
@@ -216,29 +217,28 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "request shift queues"
def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) =>
enq.ready.expect(true.B)
enq.valid.poke(valid)
enq.bits.poke(ent)
}
c.clock.step()
}
def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) =>
deq.valid.expect(valid)
deq.bits.expect(ent)
}
}
def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = {
(vec zip value).foreach { case (a, b) => a.poke(b) }
}
it should "work like normal shiftqueue when no invalidate" in {
// new CoalShiftQueue(0.U,4, testConfig)
def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) =>
enq.ready.expect(true.B)
enq.valid.poke(valid)
enq.bits.poke(ent)
}
c.clock.step()
}
def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) =>
deq.valid.expect(valid)
deq.bits.expect(ent)
}
}
def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = {
(vec zip value).foreach { case (a, b) => a.poke(b) }
}
test(new CoalShiftQueue(UInt(8.W),4, testConfig)) { c =>
c.io.coalescable.foreach(_.poke(true.B))
c.io.queue.deq.foreach(_.ready.poke(false.B))
@@ -322,42 +322,49 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B))
c.clock.step()
// attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B))
attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B))
}
}
it should "work when enqueing and dequeueing simultaneously" in {
test(new CoalShiftQueue(UInt(8.W), 4, testConfig)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.coalescable.foreach(_.poke(true.B))
c.io.queue.deq.foreach(_.ready.poke(false.B))
attemptEnqueue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B))
// mark for dequeue
c.io.coalescable.foreach(_.poke(false.B))
c.io.queue.deq.foreach(_.ready.poke(true.B))
expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B))
attemptEnqueue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B))
expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(false.B))
attemptEnqueue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B))
expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B))
attemptEnqueue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B))
expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(false.B))
attemptEnqueue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B))
expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B))
c.clock.step()
expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(false.B))
c.clock.step()
expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B))
c.clock.step()
expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(false.B))
c.clock.step()
expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B))
c.clock.step()
expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(false.B))
c.clock.step()
}
}
/*
it should "work when enqueing and dequeueing simultaneously" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.allowShift.poke(true.B)
// prepare
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
// enqueue and dequeue simultaneously
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x12.U)
c.clock.step()
// dequeueing back-to-back should work without any holes in the middle
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
c.clock.step()
// make sure is empty
c.io.queue.deq.ready.poke(true.B)
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.valid.expect(false.B)
}
}
it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in {
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
c.io.invalidate.valid.poke(false.B)
@@ -566,7 +573,8 @@ object uncoalescerTestConfig extends CoalescerConfig(
numNewSrcIds = 4,
respQueueDepth = 4,
coalLogSizes = Seq(4),
sizeEnum = DefaultInFlightTableSizeEnum
sizeEnum = DefaultInFlightTableSizeEnum,
arbiterOutputs = 4
)
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {