tl graph changes, coalescer bug fixes & coalescer unit test
This commit is contained in:
@@ -90,7 +90,8 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La
|
||||
// node.out.map(_._2).foreach(edge => require(edge.manager.beatBytes == config.maxCoalLogSize,
|
||||
// s"output edges into coalescer node does not have beatBytes = ${config.maxCoalLogSize}"))
|
||||
|
||||
val node = TLIdentityNode()
|
||||
val aggregateNode = TLIdentityNode()
|
||||
val cpuNode = TLIdentityNode()
|
||||
|
||||
// Number of maximum in-flight coalesced requests. The upper bound of this
|
||||
// value would be the sourceId range of a single lane.
|
||||
@@ -107,8 +108,9 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La
|
||||
Seq(TLMasterPortParameters.v1(coalParam))
|
||||
)
|
||||
|
||||
// Connect master node as the first inward edge of the IdentityNode
|
||||
node :=* coalescerNode
|
||||
// merge coalescerNode and cpuNode
|
||||
aggregateNode :=* coalescerNode
|
||||
aggregateNode :=* TLWidthWidget(config.wordSizeInBytes) :=* cpuNode
|
||||
|
||||
lazy val module = new CoalescingUnitImp(this, config)
|
||||
}
|
||||
@@ -210,7 +212,10 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
|
||||
val empty = Bool()
|
||||
}))
|
||||
|
||||
val shiftHint = !io.coalescable.reduce(_ || _)
|
||||
// shift hint is when the heads have no more coalescable left this or next cycle
|
||||
val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, i) =>
|
||||
c && !(io.invalidate.valid && i)
|
||||
}.reduce(_ || _)
|
||||
val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _)
|
||||
val syncedDeqValid = io.queue.deq.map(_.valid).reduce(_ || _)
|
||||
|
||||
@@ -488,18 +493,15 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
|
||||
// note: ANDing against addrMask is to conform to active byte lanes requirements
|
||||
// if aligning to LSB suffices, we should add the bitwise AND back
|
||||
m && ((req.address(config.maxCoalLogSize - 1, 0)/* & addrMask*/) === i.U)
|
||||
m && ((req.address(config.maxCoalLogSize - 1, config.wordWidth)/* & addrMask*/) === i.U)
|
||||
}
|
||||
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
|
||||
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
|
||||
s -> req.data
|
||||
})
|
||||
mask(i) := Mux(i.U < numWords,
|
||||
MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) =>
|
||||
s -> req.mask
|
||||
}),
|
||||
0.U
|
||||
)
|
||||
mask(i) := MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) =>
|
||||
s -> req.mask
|
||||
})
|
||||
}
|
||||
|
||||
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
|
||||
@@ -528,15 +530,14 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
}
|
||||
|
||||
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
|
||||
// Make sure IdentityNode is connected to an upstream node, not just the
|
||||
// coalescer TL master node
|
||||
assert(outer.node.in.length >= 2)
|
||||
assert(outer.node.in(1)._1.params.sourceBits == log2Ceil(config.numOldSrcIds),
|
||||
s"old source id bits TL param (${outer.node.in(1)._1.params.sourceBits}) mismatch with config")
|
||||
assert(outer.node.in(1)._1.params.addressBits == config.addressWidth,
|
||||
s"address width TL param (${outer.node.in(1)._1.params.addressBits}) mismatch with config")
|
||||
assert(outer.cpuNode.in.length == config.numLanes,
|
||||
s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as number of lanes")
|
||||
assert(outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds),
|
||||
s"old source id bits TL param (${outer.cpuNode.in.head._1.params.sourceBits}) mismatch with config")
|
||||
assert(outer.cpuNode.in.head._1.params.addressBits == config.addressWidth,
|
||||
s"address width TL param (${outer.cpuNode.in.head._1.params.addressBits}) mismatch with config")
|
||||
|
||||
val sourceWidth = outer.node.in(1)._1.params.sourceBits
|
||||
val sourceWidth = outer.cpuNode.in.head._1.params.sourceBits
|
||||
// note we are using word size. assuming all coalescer inputs are word sized
|
||||
val reqQueueEntryT = new ReqQueueEntry(sourceWidth, config.wordWidth, config.addressWidth, config.wordSizeInBytes)
|
||||
val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config))
|
||||
@@ -553,19 +554,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
// Override IdentityNode implementation so that we can instantiate
|
||||
// queues between input and output edges to buffer requests and responses.
|
||||
// See IdentityNode definition in `diplomacy/Nodes.scala`.
|
||||
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
|
||||
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
|
||||
assert(
|
||||
edgeIn.master.masters(0).name == "CoalescerNode",
|
||||
"First edge is not connected to the coalescer master node"
|
||||
)
|
||||
// Edge from the coalescer TL master node should simply bypass the identity node,
|
||||
// except for connecting the outgoing edge to the inflight table, which is done
|
||||
// down below.
|
||||
tlOut.a <> tlIn.a
|
||||
case (((tlIn, _), (tlOut, edgeOut)), i) =>
|
||||
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
|
||||
case (((tlIn, _), (tlOut, edgeOut)), lane) =>
|
||||
// Request queue
|
||||
val lane = i - 1
|
||||
val req = Wire(reqQueueEntryT)
|
||||
|
||||
req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
|
||||
@@ -592,7 +583,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
tlOut.a.bits := deq.bits.toTLA(edgeOut)
|
||||
}
|
||||
|
||||
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
||||
val (tlCoal, edgeCoal) = outer.coalescerNode.out.head
|
||||
|
||||
tlCoal.a.valid := coalescer.io.coalReq.valid
|
||||
tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
|
||||
@@ -643,22 +634,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
val respQueueNoncoalPort = 0
|
||||
val respQueueUncoalPortOffset = 1
|
||||
|
||||
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
|
||||
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
|
||||
assert(
|
||||
edgeIn.master.masters(0).name == "CoalescerNode",
|
||||
"First edge is not connected to the coalescer master node"
|
||||
)
|
||||
// Edge from the coalescer TL master node should simply bypass the identity node,
|
||||
// except for connecting the outgoing edge to the inflight table, which is done
|
||||
// down below.
|
||||
tlIn.d <> tlOut.d
|
||||
case (((tlIn, edgeIn), (tlOut, _)), i) =>
|
||||
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
|
||||
case (((tlIn, edgeIn), (tlOut, _)), lane) =>
|
||||
// Response queue
|
||||
//
|
||||
// This queue will serialize non-coalesced responses along with
|
||||
// coalesced responses and serve them back to the core side.
|
||||
val lane = i - 1
|
||||
val respQueue = respQueues(lane)
|
||||
val resp = Wire(respQueueEntryT)
|
||||
resp.fromTLD(tlOut.d.bits)
|
||||
@@ -1564,8 +1545,8 @@ class DummyCoalescer(implicit p: Parameters) extends LazyModule {
|
||||
|
||||
val coal = LazyModule(new CoalescingUnit(defaultConfig))
|
||||
|
||||
coal.node :=* driver.node
|
||||
rams.foreach(_.node := coal.node)
|
||||
coal.cpuNode :=* driver.node
|
||||
rams.foreach(_.node := coal.aggregateNode)
|
||||
|
||||
lazy val module = new Impl
|
||||
class Impl extends LazyModuleImp(this) with UnitTestModule {
|
||||
@@ -1604,7 +1585,8 @@ class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
|
||||
)
|
||||
)
|
||||
|
||||
memSideLogger.node :=* coal.node :=* coreSideLogger.node :=* driver.node
|
||||
memSideLogger.node :=* coal.aggregateNode
|
||||
coal.cpuNode :=* coreSideLogger.node :=* driver.node
|
||||
rams.foreach { r => r.node := memSideLogger.node }
|
||||
|
||||
lazy val module = new Impl
|
||||
@@ -1653,8 +1635,8 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
|
||||
)
|
||||
)
|
||||
|
||||
coal.node :=* driver.node
|
||||
rams.foreach { r => r.node := coal.node }
|
||||
coal.cpuNode :=* driver.node
|
||||
rams.foreach { r => r.node := coal.aggregateNode }
|
||||
|
||||
lazy val module = new Impl
|
||||
class Impl extends LazyModuleImp(this) with UnitTestModule {
|
||||
|
||||
@@ -7,6 +7,7 @@ import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util.MultiPortQueue
|
||||
import freechips.rocketchip.diplomacy._
|
||||
import chipsalliance.rocketchip.config.Parameters
|
||||
import chisel3.util.{DecoupledIO, Valid}
|
||||
import chisel3.util.experimental.BoringUtils
|
||||
|
||||
class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
@@ -80,11 +81,8 @@ class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
||||
|
||||
val dut = LazyModule(new CoalescingUnit(testConfig))
|
||||
|
||||
val widthWidgets = Seq.tabulate(4) { _ => TLWidthWidget(4)}
|
||||
(cpuNodes zip widthWidgets).foreach { case (cpuNode, widthWidget) => widthWidget := cpuNode}
|
||||
|
||||
widthWidgets.foreach(dut.node := _)
|
||||
l2Nodes.foreach(_ := dut.node)
|
||||
cpuNodes.foreach(dut.cpuNode := _)
|
||||
l2Nodes.foreach(_ := dut.aggregateNode)
|
||||
|
||||
lazy val module = new DummyCoalescingUnitTBImp(this)
|
||||
}
|
||||
@@ -99,6 +97,8 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
||||
val coalIO3 = outer.cpuNodes(3).makeIOs()
|
||||
val coalIOs = Seq(coalIO0, coalIO1, coalIO2, coalIO3)
|
||||
|
||||
// val coalMasterNode = coal.coalescerNode.makeIOs()
|
||||
|
||||
private val reqQueues = coal.module.reqQueues
|
||||
private val coalescer = coal.module.coalescer
|
||||
|
||||
@@ -107,23 +107,30 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
||||
|
||||
private val peekIn = Seq(
|
||||
reqQueues.io.queue.enq.map(_.ready),
|
||||
reqQueues.io.queue.enq.map(_.bits),
|
||||
reqQueues.io.queue.enq.map(_.valid),
|
||||
reqQueues.io.queue.deq.map(_.bits),
|
||||
reqQueues.io.queue.deq.map(_.valid),
|
||||
coalescer.io.coalReq.ready,
|
||||
coalescer.io.coalReq.bits,
|
||||
coalescer.io.coalReq.valid,
|
||||
coalescer.io.invalidate,
|
||||
)
|
||||
|
||||
val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
|
||||
val reqQueueDeqBits = peekIn(1).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(Output(x.cloneType)))
|
||||
val reqQueueDeqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
|
||||
val coalReqReady = IO(Output(peekIn(3).asInstanceOf[Bool].cloneType))
|
||||
val coalReqBits = IO(Output(peekIn(4).asInstanceOf[ReqQueueEntry].cloneType))
|
||||
val coalReqValid = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType))
|
||||
val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
|
||||
val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(x.cloneType))
|
||||
val reqQueueEnqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
|
||||
val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(Output(x.cloneType)))
|
||||
val reqQueueDeqValid = peekIn(4).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
|
||||
val coalReqReady = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType))
|
||||
val coalReqBits = IO(Output(peekIn(6).asInstanceOf[ReqQueueEntry].cloneType))
|
||||
val coalReqValid = IO(Output(peekIn(7).asInstanceOf[Bool].cloneType))
|
||||
val coalInvalidate = IO(Output(peekIn(8).asInstanceOf[Valid[Vec[UInt]]].cloneType))
|
||||
|
||||
private val peekOut = Seq(
|
||||
reqQueueEnqReady, reqQueueDeqBits, reqQueueDeqValid,
|
||||
coalReqReady, coalReqBits, coalReqValid,
|
||||
reqQueueEnqReady, reqQueueEnqBits, reqQueueEnqValid,
|
||||
reqQueueDeqBits, reqQueueDeqValid,
|
||||
coalReqReady, coalReqBits, coalReqValid, coalInvalidate,
|
||||
)
|
||||
|
||||
(peekIn zip peekOut).foreach {
|
||||
@@ -142,14 +149,13 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
||||
// coalescer.io.coalReq.ready
|
||||
)
|
||||
|
||||
val reqQueueDeqReady = pokeIn(0).asInstanceOf[Seq[Bool]].map(x => IO(Input(x.cloneType)))
|
||||
val reqQueueDeqReady = pokeIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
|
||||
|
||||
private val pokeOut = Seq(
|
||||
reqQueueDeqReady
|
||||
)
|
||||
|
||||
// TODO: doesn't work yet
|
||||
/*
|
||||
(pokeIn zip pokeOut).foreach {
|
||||
case (inner: IndexedSeq[Data], outer: Seq[Data]) =>
|
||||
(inner zip outer).foreach { case (i, o) =>
|
||||
@@ -159,9 +165,7 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
||||
BoringUtils.bore(inner, Seq(outer))
|
||||
case _ =>
|
||||
assert(false, "boring between different data types")
|
||||
}*/
|
||||
|
||||
// val coalMasterNode = coal.coalescerNode.makeIOs()
|
||||
}
|
||||
}
|
||||
|
||||
object testConfig extends CoalescerConfig(
|
||||
@@ -176,7 +180,7 @@ object testConfig extends CoalescerConfig(
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalLogSizes = Seq(3),
|
||||
coalLogSizes = Seq(4, 5),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum,
|
||||
arbiterOutputs = 4
|
||||
)
|
||||
@@ -188,13 +192,8 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
|
||||
def pokeA(
|
||||
nodes: Seq[TLBundle],
|
||||
idx: Int,
|
||||
op: Int,
|
||||
size: Int,
|
||||
source: Int,
|
||||
addr: Int,
|
||||
mask: Int,
|
||||
data: Int
|
||||
idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int,
|
||||
valid: Boolean = true,
|
||||
): Unit = {
|
||||
val node = nodes(idx)
|
||||
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
||||
@@ -206,7 +205,7 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
node.a.bits.mask.poke(mask.U)
|
||||
node.a.bits.data.poke(data.U)
|
||||
node.a.bits.corrupt.poke(false.B)
|
||||
node.a.valid.poke(true.B)
|
||||
node.a.valid.poke(valid.B)
|
||||
}
|
||||
|
||||
def unsetA(nodes: Seq[TLBundle]): Unit = {
|
||||
@@ -215,31 +214,56 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
}
|
||||
}
|
||||
|
||||
it should "coalesce fully consecutive accesses at size 4, only once" in {
|
||||
test(LazyModule(new DummyCoalescingUnitTB()).module)
|
||||
.withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
|
||||
{ c =>
|
||||
println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||
val nodes = c.coalIOs.map(_.head)
|
||||
def expectVec[T <: Data](vec: Seq[T], value: Seq[T]): Unit = {
|
||||
(vec zip value).foreach { case (a, b) => a.expect(b) }
|
||||
}
|
||||
|
||||
c.reqQueueEnqReady.foreach(_.expect(true.B))
|
||||
it should "coalesce fully consecutive accesses at size 4, only once" in {
|
||||
test(LazyModule(new DummyCoalescingUnitTB()).module)
|
||||
.withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
|
||||
{ c =>
|
||||
println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||
val nodes = c.coalIOs.map(_.head)
|
||||
// TODO: this doesn't work
|
||||
// c.coalMasterNode.head.a.ready.poke(true.B)
|
||||
|
||||
// always ready to take non-coalesced requests
|
||||
c.reqQueueDeqReady.foreach(_.poke(true.B))
|
||||
c.reqQueueEnqReady.foreach(_.expect(true.B))
|
||||
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
|
||||
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
|
||||
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
|
||||
expectVec(c.reqQueueEnqBits.map(_.data), Seq(0x1111.U, 0x2222.U, 0x3333.U, 0x4444.U))
|
||||
c.clock.step()
|
||||
|
||||
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
|
||||
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
|
||||
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
|
||||
unsetA(nodes)
|
||||
c.reqQueueDeqValid.foreach(_.expect(false.B))
|
||||
|
||||
c.clock.step()
|
||||
c.coalReqValid.expect(true.B)
|
||||
c.coalReqBits.address.expect(0x10.U)
|
||||
c.coalReqBits.data.expect(BigInt("4444000033330000222200001111", 16) << 128)
|
||||
c.coalReqBits.mask.expect(0xffff0000L)
|
||||
c.coalReqBits.size.expect(4.U)
|
||||
c.coalReqBits.op.expect(1.U)
|
||||
|
||||
unsetA(nodes)
|
||||
// c.coalReqReady.expect(true.B)
|
||||
c.reqQueueEnqReady.foreach(_.expect(true.B))
|
||||
pokeA(nodes, idx = 0, op = 1, size = 2, source = 1, addr = 0xf20, mask = 0xf, data = 0x5555)
|
||||
pokeA(nodes, idx = 1, op = 1, size = 2, source = 1, addr = 0xf24, mask = 0xf, data = 0x6666, valid = false)
|
||||
pokeA(nodes, idx = 2, op = 1, size = 2, source = 1, addr = 0xf28, mask = 0xf, data = 0x7777)
|
||||
pokeA(nodes, idx = 3, op = 1, size = 2, source = 1, addr = 0xf2c, mask = 0xf, data = 0x8888, valid = false)
|
||||
c.clock.step()
|
||||
|
||||
c.clock.step()
|
||||
c.clock.step()
|
||||
}
|
||||
}
|
||||
c.coalReqValid.expect(true.B)
|
||||
c.coalReqBits.address.expect(0xf20.U)
|
||||
c.coalReqBits.data.expect(BigInt("77770000000000005555", 16)) // technically these can be dontcare's
|
||||
c.coalReqBits.mask.expect(0x0000ffff)
|
||||
c.coalReqBits.size.expect(4.U)
|
||||
c.coalReqBits.op.expect(1.U)
|
||||
|
||||
c.clock.step()
|
||||
c.clock.step()
|
||||
}
|
||||
}
|
||||
|
||||
it should "coalesce identical addresses (stride of 0)" in {
|
||||
test(LazyModule(new DummyCoalescingUnitTB()).module)
|
||||
|
||||
Reference in New Issue
Block a user