active byte lane implementation for multi coalescer & add one shift queue test
This commit is contained in:
@@ -52,6 +52,7 @@ case class CoalescerConfig(
|
||||
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
||||
// each size is log(byteSize)
|
||||
sizeEnum: InFlightTableSizeEnum,
|
||||
arbiterOutputs: Int
|
||||
) {
|
||||
// maximum coalesced size
|
||||
def maxCoalLogSize: Int = coalLogSizes.max
|
||||
@@ -70,13 +71,25 @@ object defaultConfig extends CoalescerConfig(
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalLogSizes = Seq(3),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
sizeEnum = DefaultInFlightTableSizeEnum,
|
||||
arbiterOutputs = 4
|
||||
)
|
||||
|
||||
class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends LazyModule {
|
||||
// Identity node that captures the incoming TL requests and passes them
|
||||
// through the other end, dropping coalesced requests. This node is what
|
||||
// will be visible to upstream and downstream nodes.
|
||||
// Nexus node that captures the incoming TL requests, rewrites coalescable requests,
|
||||
// and arbitrates between non-coalesced and coalesced requests to a fix number of outputs
|
||||
// before sending it out to memory. This node is what's visible to upstream and downstream nodes.
|
||||
|
||||
// WIP:
|
||||
// val node = TLNexusNode(
|
||||
// clientFn = c => c.head,
|
||||
// managerFn = m => m.head // assuming arbiter generated ids are distinct between edges
|
||||
// )
|
||||
// node.in.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.wordSizeInBytes,
|
||||
// s"input edges into coalescer node does not have beatBytes = ${config.wordSizeInBytes}"))
|
||||
// node.out.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.maxCoalLogSize,
|
||||
// s"output edges into coalescer node does not have beatBytes = ${config.maxCoalLogSize}"))
|
||||
|
||||
val node = TLIdentityNode()
|
||||
|
||||
// Number of maximum in-flight coalesced requests. The upper bound of this
|
||||
@@ -452,7 +465,9 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
|
||||
for (i <- 0 until maxWords) {
|
||||
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
|
||||
m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U)
|
||||
// note: ANDing against addrMask is to conform to active byte lanes requirements
|
||||
// if aligning to LSB suffices, we should add the bitwise AND back
|
||||
m && ((req.address(config.maxCoalLogSize - 1, 0)/* & addrMask*/) === i.U)
|
||||
}
|
||||
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
|
||||
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
|
||||
|
||||
@@ -114,7 +114,8 @@ object testConfig extends CoalescerConfig(
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalLogSizes = Seq(3),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
sizeEnum = DefaultInFlightTableSizeEnum,
|
||||
arbiterOutputs = 4
|
||||
)
|
||||
|
||||
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
@@ -216,29 +217,28 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
behavior of "request shift queues"
|
||||
|
||||
def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
|
||||
((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) =>
|
||||
enq.ready.expect(true.B)
|
||||
enq.valid.poke(valid)
|
||||
enq.bits.poke(ent)
|
||||
}
|
||||
c.clock.step()
|
||||
}
|
||||
|
||||
def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
|
||||
((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) =>
|
||||
deq.valid.expect(valid)
|
||||
deq.bits.expect(ent)
|
||||
}
|
||||
}
|
||||
|
||||
def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = {
|
||||
(vec zip value).foreach { case (a, b) => a.poke(b) }
|
||||
}
|
||||
|
||||
it should "work like normal shiftqueue when no invalidate" in {
|
||||
|
||||
// new CoalShiftQueue(0.U,4, testConfig)
|
||||
def attemptEnqueue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
|
||||
((c.io.queue.enq zip bits) zip valids).foreach { case ((enq, ent), valid) =>
|
||||
enq.ready.expect(true.B)
|
||||
enq.valid.poke(valid)
|
||||
enq.bits.poke(ent)
|
||||
}
|
||||
c.clock.step()
|
||||
}
|
||||
|
||||
def expectDequeue(c: CoalShiftQueue[UInt], bits: Seq[UInt], valids: Seq[Bool]): Unit = {
|
||||
((c.io.queue.deq zip bits) zip valids).foreach { case ((deq, ent), valid) =>
|
||||
deq.valid.expect(valid)
|
||||
deq.bits.expect(ent)
|
||||
}
|
||||
}
|
||||
|
||||
def pokeVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = {
|
||||
(vec zip value).foreach { case (a, b) => a.poke(b) }
|
||||
}
|
||||
|
||||
test(new CoalShiftQueue(UInt(8.W),4, testConfig)) { c =>
|
||||
c.io.coalescable.foreach(_.poke(true.B))
|
||||
c.io.queue.deq.foreach(_.ready.poke(false.B))
|
||||
@@ -322,42 +322,49 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B))
|
||||
c.clock.step()
|
||||
|
||||
// attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B))
|
||||
attemptEnqueue(c, Seq.fill(4)(6.U), Seq.fill(4)(true.B))
|
||||
}
|
||||
}
|
||||
|
||||
it should "work when enqueing and dequeueing simultaneously" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4, testConfig)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
|
||||
c.io.coalescable.foreach(_.poke(true.B))
|
||||
c.io.queue.deq.foreach(_.ready.poke(false.B))
|
||||
|
||||
attemptEnqueue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B))
|
||||
|
||||
// mark for dequeue
|
||||
c.io.coalescable.foreach(_.poke(false.B))
|
||||
c.io.queue.deq.foreach(_.ready.poke(true.B))
|
||||
expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(true.B))
|
||||
attemptEnqueue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B))
|
||||
|
||||
expectDequeue(c, Seq.fill(4)(1.U), Seq.fill(4)(false.B))
|
||||
attemptEnqueue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B))
|
||||
|
||||
expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(true.B))
|
||||
attemptEnqueue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B))
|
||||
|
||||
expectDequeue(c, Seq.fill(4)(2.U), Seq.fill(4)(false.B))
|
||||
attemptEnqueue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B))
|
||||
|
||||
expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(true.B))
|
||||
c.clock.step()
|
||||
expectDequeue(c, Seq.fill(4)(3.U), Seq.fill(4)(false.B))
|
||||
c.clock.step()
|
||||
expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(true.B))
|
||||
c.clock.step()
|
||||
expectDequeue(c, Seq.fill(4)(4.U), Seq.fill(4)(false.B))
|
||||
c.clock.step()
|
||||
expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(true.B))
|
||||
c.clock.step()
|
||||
expectDequeue(c, Seq.fill(4)(5.U), Seq.fill(4)(false.B))
|
||||
c.clock.step()
|
||||
}
|
||||
}
|
||||
/*
|
||||
it should "work when enqueing and dequeueing simultaneously" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
// enqueue and dequeue simultaneously
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x12.U)
|
||||
c.clock.step()
|
||||
// dequeueing back-to-back should work without any holes in the middle
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
c.clock.step()
|
||||
// make sure is empty
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
@@ -566,7 +573,8 @@ object uncoalescerTestConfig extends CoalescerConfig(
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalLogSizes = Seq(4),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
sizeEnum = DefaultInFlightTableSizeEnum,
|
||||
arbiterOutputs = 4
|
||||
)
|
||||
|
||||
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
|
||||
Reference in New Issue
Block a user