tl graph changes, coalescer bug fixes & coalescer unit test

This commit is contained in:
Richard Yan
2023-05-03 17:58:25 -07:00
parent 736a8d4e98
commit ebd6c54d67
2 changed files with 101 additions and 95 deletions

View File

@@ -90,7 +90,8 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La
// node.out.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.maxCoalLogSize,
// s"output edges into coalescer node does not have beatBytes = ${config.maxCoalLogSize}"))
val node = TLIdentityNode()
val aggregateNode = TLIdentityNode()
val cpuNode = TLIdentityNode()
// Number of maximum in-flight coalesced requests. The upper bound of this
// value would be the sourceId range of a single lane.
@@ -107,8 +108,9 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La
Seq(TLMasterPortParameters.v1(coalParam))
)
// Connect master node as the first inward edge of the IdentityNode
node :=* coalescerNode
// merge coalescerNode and cpuNode
aggregateNode :=* coalescerNode
aggregateNode :=* TLWidthWidget(config.wordSizeInBytes) :=* cpuNode
lazy val module = new CoalescingUnitImp(this, config)
}
@@ -210,7 +212,10 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
val empty = Bool()
}))
val shiftHint = !io.coalescable.reduce(_ || _)
// shift hint is when the heads have no more coalescable left this or next cycle
val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, i) =>
c && !(io.invalidate.valid && i)
}.reduce(_ || _)
val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _)
val syncedDeqValid = io.queue.deq.map(_.valid).reduce(_ || _)
@@ -488,18 +493,15 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
// note: ANDing against addrMask is to conform to active byte lanes requirements
// if aligning to LSB suffices, we should add the bitwise AND back
m && ((req.address(config.maxCoalLogSize - 1, 0)/* & addrMask*/) === i.U)
m && ((req.address(config.maxCoalLogSize - 1, config.wordWidth)/* & addrMask*/) === i.U)
}
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
s -> req.data
})
mask(i) := Mux(i.U < numWords,
MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) =>
s -> req.mask
}),
0.U
)
mask(i) := MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) =>
s -> req.mask
})
}
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
@@ -528,15 +530,14 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
}
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
// Make sure IdentityNode is connected to an upstream node, not just the
// coalescer TL master node
assert(outer.node.in.length >= 2)
assert(outer.node.in(1)._1.params.sourceBits == log2Ceil(config.numOldSrcIds),
s"old source id bits TL param (${outer.node.in(1)._1.params.sourceBits}) mismatch with config")
assert(outer.node.in(1)._1.params.addressBits == config.addressWidth,
s"address width TL param (${outer.node.in(1)._1.params.addressBits}) mismatch with config")
assert(outer.cpuNode.in.length == config.numLanes,
s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as number of lanes")
assert(outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds),
s"old source id bits TL param (${outer.cpuNode.in.head._1.params.sourceBits}) mismatch with config")
assert(outer.cpuNode.in.head._1.params.addressBits == config.addressWidth,
s"address width TL param (${outer.cpuNode.in.head._1.params.addressBits}) mismatch with config")
val sourceWidth = outer.node.in(1)._1.params.sourceBits
val sourceWidth = outer.cpuNode.in.head._1.params.sourceBits
// note we are using word size. assuming all coalescer inputs are word sized
val reqQueueEntryT = new ReqQueueEntry(sourceWidth, config.wordWidth, config.addressWidth, config.wordSizeInBytes)
val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config))
@@ -553,19 +554,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// Override IdentityNode implementation so that we can instantiate
// queues between input and output edges to buffer requests and responses.
// See IdentityNode definition in `diplomacy/Nodes.scala`.
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
assert(
edgeIn.master.masters(0).name == "CoalescerNode",
"First edge is not connected to the coalescer master node"
)
// Edge from the coalescer TL master node should simply bypass the identity node,
// except for connecting the outgoing edge to the inflight table, which is done
// down below.
tlOut.a <> tlIn.a
case (((tlIn, _), (tlOut, edgeOut)), i) =>
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
case (((tlIn, _), (tlOut, edgeOut)), lane) =>
// Request queue
val lane = i - 1
val req = Wire(reqQueueEntryT)
req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
@@ -592,7 +583,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
tlOut.a.bits := deq.bits.toTLA(edgeOut)
}
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
val (tlCoal, edgeCoal) = outer.coalescerNode.out.head
tlCoal.a.valid := coalescer.io.coalReq.valid
tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
@@ -643,22 +634,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
val respQueueNoncoalPort = 0
val respQueueUncoalPortOffset = 1
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
assert(
edgeIn.master.masters(0).name == "CoalescerNode",
"First edge is not connected to the coalescer master node"
)
// Edge from the coalescer TL master node should simply bypass the identity node,
// except for connecting the outgoing edge to the inflight table, which is done
// down below.
tlIn.d <> tlOut.d
case (((tlIn, edgeIn), (tlOut, _)), i) =>
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
case (((tlIn, edgeIn), (tlOut, _)), lane) =>
// Response queue
//
// This queue will serialize non-coalesced responses along with
// coalesced responses and serve them back to the core side.
val lane = i - 1
val respQueue = respQueues(lane)
val resp = Wire(respQueueEntryT)
resp.fromTLD(tlOut.d.bits)
@@ -1564,8 +1545,8 @@ class DummyCoalescer(implicit p: Parameters) extends LazyModule {
val coal = LazyModule(new CoalescingUnit(defaultConfig))
coal.node :=* driver.node
rams.foreach(_.node := coal.node)
coal.cpuNode :=* driver.node
rams.foreach(_.node := coal.aggregateNode)
lazy val module = new Impl
class Impl extends LazyModuleImp(this) with UnitTestModule {
@@ -1604,7 +1585,8 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
)
)
memSideLogger.node :=* coal.node :=* coreSideLogger.node :=* driver.node
memSideLogger.node :=* coal.aggregateNode
coal.cpuNode :=* coreSideLogger.node :=* driver.node
rams.foreach { r => r.node := memSideLogger.node }
lazy val module = new Impl
@@ -1653,8 +1635,8 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
)
)
coal.node :=* driver.node
rams.foreach { r => r.node := coal.node }
coal.cpuNode :=* driver.node
rams.foreach { r => r.node := coal.aggregateNode }
lazy val module = new Impl
class Impl extends LazyModuleImp(this) with UnitTestModule {

View File

@@ -7,6 +7,7 @@ import freechips.rocketchip.tilelink._
import freechips.rocketchip.util.MultiPortQueue
import freechips.rocketchip.diplomacy._
import chipsalliance.rocketchip.config.Parameters
import chisel3.util.{DecoupledIO, Valid}
import chisel3.util.experimental.BoringUtils
class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
@@ -80,11 +81,8 @@ class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
val dut = LazyModule(new CoalescingUnit(testConfig))
val widthWidgets = Seq.tabulate(4) { _ => TLWidthWidget(4)}
(cpuNodes zip widthWidgets).foreach { case (cpuNode, widthWidget) => widthWidget := cpuNode}
widthWidgets.foreach(dut.node := _)
l2Nodes.foreach(_ := dut.node)
cpuNodes.foreach(dut.cpuNode := _)
l2Nodes.foreach(_ := dut.aggregateNode)
lazy val module = new DummyCoalescingUnitTBImp(this)
}
@@ -99,6 +97,8 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
val coalIO3 = outer.cpuNodes(3).makeIOs()
val coalIOs = Seq(coalIO0, coalIO1, coalIO2, coalIO3)
// val coalMasterNode = coal.coalescerNode.makeIOs()
private val reqQueues = coal.module.reqQueues
private val coalescer = coal.module.coalescer
@@ -107,23 +107,30 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
private val peekIn = Seq(
reqQueues.io.queue.enq.map(_.ready),
reqQueues.io.queue.enq.map(_.bits),
reqQueues.io.queue.enq.map(_.valid),
reqQueues.io.queue.deq.map(_.bits),
reqQueues.io.queue.deq.map(_.valid),
coalescer.io.coalReq.ready,
coalescer.io.coalReq.bits,
coalescer.io.coalReq.valid,
coalescer.io.invalidate,
)
val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
val reqQueueDeqBits = peekIn(1).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(Output(x.cloneType)))
val reqQueueDeqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
val coalReqReady = IO(Output(peekIn(3).asInstanceOf[Bool].cloneType))
val coalReqBits = IO(Output(peekIn(4).asInstanceOf[ReqQueueEntry].cloneType))
val coalReqValid = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType))
val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(x.cloneType))
val reqQueueEnqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(Output(x.cloneType)))
val reqQueueDeqValid = peekIn(4).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
val coalReqReady = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType))
val coalReqBits = IO(Output(peekIn(6).asInstanceOf[ReqQueueEntry].cloneType))
val coalReqValid = IO(Output(peekIn(7).asInstanceOf[Bool].cloneType))
val coalInvalidate = IO(Output(peekIn(8).asInstanceOf[Valid[Vec[UInt]]].cloneType))
private val peekOut = Seq(
reqQueueEnqReady, reqQueueDeqBits, reqQueueDeqValid,
coalReqReady, coalReqBits, coalReqValid,
reqQueueEnqReady, reqQueueEnqBits, reqQueueEnqValid,
reqQueueDeqBits, reqQueueDeqValid,
coalReqReady, coalReqBits, coalReqValid, coalInvalidate,
)
(peekIn zip peekOut).foreach {
@@ -142,14 +149,13 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
// coalescer.io.coalReq.ready
)
val reqQueueDeqReady = pokeIn(0).asInstanceOf[Seq[Bool]].map(x => IO(Input(x.cloneType)))
val reqQueueDeqReady = pokeIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
private val pokeOut = Seq(
reqQueueDeqReady
)
// TODO: doesn't work yet
/*
(pokeIn zip pokeOut).foreach {
case (inner: IndexedSeq[Data], outer: Seq[Data]) =>
(inner zip outer).foreach { case (i, o) =>
@@ -159,9 +165,7 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
BoringUtils.bore(inner, Seq(outer))
case _ =>
assert(false, "boring between different data types")
}*/
// val coalMasterNode = coal.coalescerNode.makeIOs()
}
}
object testConfig extends CoalescerConfig(
@@ -176,7 +180,7 @@ object testConfig extends CoalescerConfig(
numOldSrcIds = 16,
numNewSrcIds = 4,
respQueueDepth = 4,
coalLogSizes = Seq(3),
coalLogSizes = Seq(4, 5),
sizeEnum = DefaultInFlightTableSizeEnum,
arbiterOutputs = 4
)
@@ -188,13 +192,8 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
def pokeA(
nodes: Seq[TLBundle],
idx: Int,
op: Int,
size: Int,
source: Int,
addr: Int,
mask: Int,
data: Int
idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int,
valid: Boolean = true,
): Unit = {
val node = nodes(idx)
// node.a.ready.expect(true.B) // FIXME: this fails currently
@@ -206,7 +205,7 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
node.a.bits.mask.poke(mask.U)
node.a.bits.data.poke(data.U)
node.a.bits.corrupt.poke(false.B)
node.a.valid.poke(true.B)
node.a.valid.poke(valid.B)
}
def unsetA(nodes: Seq[TLBundle]): Unit = {
@@ -215,31 +214,56 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
}
}
it should "coalesce fully consecutive accesses at size 4, only once" in {
test(LazyModule(new DummyCoalescingUnitTB()).module)
.withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
{ c =>
println(s"coalIO length = ${c.coalIOs(0).length}")
val nodes = c.coalIOs.map(_.head)
def expectVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = {
(vec zip value).foreach { case (a, b) => a.expect(b) }
}
c.reqQueueEnqReady.foreach(_.expect(true.B))
it should "coalesce fully consecutive accesses at size 4, only once" in {
test(LazyModule(new DummyCoalescingUnitTB()).module)
.withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
{ c =>
println(s"coalIO length = ${c.coalIOs(0).length}")
val nodes = c.coalIOs.map(_.head)
// TODO: this doesn't work
// c.coalMasterNode.head.a.ready.poke(true.B)
// always ready to take non-coalesced requests
c.reqQueueDeqReady.foreach(_.poke(true.B))
c.reqQueueEnqReady.foreach(_.expect(true.B))
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
expectVec(c.reqQueueEnqBits.map(_.data), Seq(0x1111.U, 0x2222.U, 0x3333.U, 0x4444.U))
c.clock.step()
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
unsetA(nodes)
c.reqQueueDeqValid.foreach(_.expect(false.B))
c.clock.step()
c.coalReqValid.expect(true.B)
c.coalReqBits.address.expect(0x10.U)
c.coalReqBits.data.expect(BigInt("4444000033330000222200001111", 16) << 128)
c.coalReqBits.mask.expect(0xffff0000L)
c.coalReqBits.size.expect(4.U)
c.coalReqBits.op.expect(1.U)
unsetA(nodes)
// c.coalReqReady.expect(true.B)
c.reqQueueEnqReady.foreach(_.expect(true.B))
pokeA(nodes, idx = 0, op = 1, size = 2, source = 1, addr = 0xf20, mask = 0xf, data = 0x5555)
pokeA(nodes, idx = 1, op = 1, size = 2, source = 1, addr = 0xf24, mask = 0xf, data = 0x6666, valid = false)
pokeA(nodes, idx = 2, op = 1, size = 2, source = 1, addr = 0xf28, mask = 0xf, data = 0x7777)
pokeA(nodes, idx = 3, op = 1, size = 2, source = 1, addr = 0xf2c, mask = 0xf, data = 0x8888, valid = false)
c.clock.step()
c.clock.step()
c.clock.step()
}
}
c.coalReqValid.expect(true.B)
c.coalReqBits.address.expect(0xf20.U)
c.coalReqBits.data.expect(BigInt("77770000000000005555", 16)) // technically these can be dontcare's
c.coalReqBits.mask.expect(0x0000ffff)
c.coalReqBits.size.expect(4.U)
c.coalReqBits.op.expect(1.U)
c.clock.step()
c.clock.step()
}
}
it should "coalesce identical addresses (stride of 0)" in {
test(LazyModule(new DummyCoalescingUnitTB()).module)