More comments & renames

This commit is contained in:
Hansung Kim
2023-04-27 19:17:07 -07:00
parent 900f5adb20
commit 7780250c7a
2 changed files with 171 additions and 122 deletions

View File

@@ -50,8 +50,8 @@ case class CoalescerConfig(
numOldSrcIds: Int, // num of outstanding requests per lane, from processor numOldSrcIds: Int, // num of outstanding requests per lane, from processor
numNewSrcIds: Int, // num of outstanding coalesced requests numNewSrcIds: Int, // num of outstanding coalesced requests
respQueueDepth: Int, // depth of the response fifo queues respQueueDepth: Int, // depth of the response fifo queues
coalSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
// must be power of 2's // each size is log(byteSize)
sizeEnum: InFlightTableSizeEnum sizeEnum: InFlightTableSizeEnum
) )
@@ -69,7 +69,7 @@ object defaultConfig extends CoalescerConfig(
numOldSrcIds = 16, numOldSrcIds = 16,
numNewSrcIds = 4, numNewSrcIds = 4,
respQueueDepth = 4, respQueueDepth = 4,
coalSizes = Seq(3), coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum sizeEnum = DefaultInFlightTableSizeEnum
) )
@@ -243,7 +243,7 @@ class CoalShiftQueue[T <: Data](
} }
// Software model: coalescer.py // Software model: coalescer.py
class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
config: CoalescerConfig) extends Module { config: CoalescerConfig) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val window = Input(Vec(config.numLanes, windowT.io.cloneType)) val window = Input(Vec(config.numLanes, windowT.io.cloneType))
@@ -251,6 +251,8 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
val baseAddr = Output(UInt(config.addressWidth.W)) val baseAddr = Output(UInt(config.addressWidth.W))
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W))) val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
// number of entries matched with this leader lane's head.
// maximum is numLanes * queueDepth
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W)) val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W))
val coverageHits = Output(UInt((1 << config.maxSize).W)) val coverageHits = Output(UInt((1 << config.maxSize).W))
}) })
@@ -284,7 +286,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
printQueueHeads printQueueHeads
} }
val size = coalSize val size = coalLogSize
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = { def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
(req0.op === req1.op) && (req0.op === req1.op) &&
@@ -323,6 +325,18 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
})(chosenLeaderIdx) })(chosenLeaderIdx)
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx) val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
// coverage calculation
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
// 2-D table flattened to 1-D
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
val valids = io.window.map(_.mask).flatMap(_.asBools)
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
// count if any of the queue entries accesses the given offset word of the
// coalesced chunk; if 1 for all offsets, we've reached 100% utilization
// of the coalesced data words
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
}
// debug prints // debug prints
when (leadersValid.reduce(_ || _)) { when (leadersValid.reduce(_ || _)) {
matchCounts.zipWithIndex.foreach { case (count, i) => matchCounts.zipWithIndex.foreach { case (count, i) =>
@@ -334,14 +348,12 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
printf("%d ", m) printf("%d ", m)
} }
printf("]\n") printf("]\n")
}
// coverage calculation printf("hits = [ ")
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth) hits.foreach { m =>
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address))) printf("%d ", m)
val valids = io.window.map(_.mask).flatMap(_.asBools) }
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target => printf("]\n")
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
} }
io.results.leaderIdx := chosenLeaderIdx io.results.leaderIdx := chosenLeaderIdx
@@ -356,16 +368,19 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
config: CoalescerConfig) extends Module { config: CoalescerConfig) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
// coalescing window, connected to the contents of the request queues
val window = Input(Vec(config.numLanes, windowT.io.cloneType)) val window = Input(Vec(config.numLanes, windowT.io.cloneType))
// newly generated coalesced request
val outReq = DecoupledIO(coalReqT.cloneType) val outReq = DecoupledIO(coalReqT.cloneType)
// invalidate signals going into each request queue's head
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
}) })
val coalescers = config.coalSizes.map(size => Module(new MonoCoalescer(size, windowT, config))) val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
coalescers.foreach(_.io.window := io.window) coalescers.foreach(_.io.window := io.window)
def normalize(x: Seq[UInt]): Seq[UInt] = { def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
x.zip(config.coalSizes).map { case (hits, size) => (valPerSize zip config.coalLogSizes).map { case (hits, size) =>
(hits << (config.maxSize - size).U).asUInt (hits << (config.maxSize - size).U).asUInt
} }
} }
@@ -378,27 +393,34 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
}._2 }._2
} }
// normalize to maximum coalescing size so that we can do fair comparisons
// between coalescing results of different sizes
val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount)) val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount))
val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits)) val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits))
val chosenIdx = Wire(UInt(log2Ceil(config.coalSizes.size).W)) val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
val chosenValid = Wire(Bool()) val chosenValid = Wire(Bool())
// minimum 25% coverage // minimum 25% coverage
val minCoverage = 1.max(1 << (config.maxSize - 4)) val minCoverage = 1.max(1 << ((config.maxSize - 2) - 2))
printf("matchCount[0]=%d\n", coalescers(0).io.results.matchCount)
printf("normalizedMatches[0]=%d\n", normalizedMatches(0))
printf("coverageHits[0]=%d\n", coalescers(0).io.results.coverageHits)
printf("normalizedHits[0]=%d\n", normalizedHits(0))
printf("minCoverage=%d\n", minCoverage.U)
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
chosenIdx := argMax(normalizedHits) chosenSizeIdx := argMax(normalizedHits)
chosenValid := true.B chosenValid := true.B
}.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) { }.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) {
chosenIdx := argMax(normalizedMatches) chosenSizeIdx := argMax(normalizedMatches)
chosenValid := true.B chosenValid := true.B
}.otherwise { }.otherwise {
chosenIdx := DontCare chosenSizeIdx := DontCare
chosenValid := false.B chosenValid := false.B
} }
// create coalesced request // create coalesced request
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenIdx) val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenSizeIdx)
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenIdx) val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
// flatten requests and matches // flatten requests and matches
val flatReqs = io.window.flatMap(_.elts) val flatReqs = io.window.flatMap(_.elts)
@@ -437,13 +459,18 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds))) val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created
val coalesceValid = chosenValid && sourceGen.io.id.valid
when (coalesceValid) {
printf("coalescing success!\n")
}
io.outReq.bits.source := sourceGen.io.id.bits io.outReq.bits.source := sourceGen.io.id.bits
io.outReq.bits.mask := mask.asUInt io.outReq.bits.mask := mask.asUInt
io.outReq.bits.data := data.asUInt io.outReq.bits.data := data.asUInt
io.outReq.bits.size := chosenSize io.outReq.bits.size := chosenSize
io.outReq.bits.address := chosenBundle.baseAddr io.outReq.bits.address := chosenBundle.baseAddr
io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
io.outReq.valid := chosenValid && sourceGen.io.id.valid io.outReq.valid := coalesceValid
io.invalidate.bits := chosenBundle.matchOH io.invalidate.bits := chosenBundle.matchOH
io.invalidate.valid := io.outReq.fire // invalidate only when fire io.invalidate.valid := io.outReq.fire // invalidate only when fire

View File

@@ -35,17 +35,30 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule { class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ => val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters( TLClientNode(
Seq(
TLMasterPortParameters.v1(
Seq(
TLClientParameters(
name = "processor-nodes", name = "processor-nodes",
sourceId = IdRange(0, testConfig.numOldSrcIds), sourceId = IdRange(0, testConfig.numOldSrcIds),
requestFifo = true, requestFifo = true,
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig) visibility = Seq(AddressSet(0x0, 0xffffff))
)
)
)
)
) // 24 bit address space (TODO probably use testConfig)
} }
val device = new SimpleDevice("dummy", Seq("dummy")) val device = new SimpleDevice("dummy", Seq("dummy"))
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
val l2Nodes = Seq.tabulate(5) { _ => val l2Nodes = Seq.tabulate(5) { _ =>
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters( TLManagerNode(
Seq(
TLSlavePortParameters.v1(
Seq(
TLManagerParameters(
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
resources = device.reg, resources = device.reg,
regionType = RegionType.UNCACHED, regionType = RegionType.UNCACHED,
@@ -56,7 +69,13 @@ class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
supportsPutFull = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes),
supportsPutPartial = TransferSizes(1, beatBytes), supportsPutPartial = TransferSizes(1, beatBytes),
supportsHint = TransferSizes(1, beatBytes), supportsHint = TransferSizes(1, beatBytes),
fifoId = Some(0))), beatBytes))) fifoId = Some(0)
)
),
beatBytes
)
)
)
} }
val dut = LazyModule(new CoalescingUnit(testConfig)) val dut = LazyModule(new CoalescingUnit(testConfig))
@@ -80,7 +99,6 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "multi- and mono-coalescers" behavior of "multi- and mono-coalescers"
it should "coalesce fully consecutive accesses at size 4, only once" in {
implicit val p: Parameters = Parameters.empty implicit val p: Parameters = Parameters.empty
val tb = LazyModule(new DummyCoalescingUnitTB()) val tb = LazyModule(new DummyCoalescingUnitTB())
@@ -90,13 +108,16 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
tb.cpuNodes.foreach(coal.node := _) tb.cpuNodes.foreach(coal.node := _)
tb.l2Nodes.foreach(_ := coal.node) tb.l2Nodes.foreach(_ := coal.node)
test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c => def pokeA(
val nodes = c.coalIOs.map(_.head) nodes: Seq[TLBundle],
// val nodes = c.cpuNodesImp.map(_.out.head._1) idx: Int,
// val nodes = c.coal.node.in.map(_._1) op: Int,
// val nodes = c.mitmNodesImp.map(_.in.head._1) size: Int,
source: Int,
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = { addr: Int,
mask: Int,
data: Int
): Unit = {
val node = nodes(idx) val node = nodes(idx)
// node.a.ready.expect(true.B) // FIXME: this fails currently // node.a.ready.expect(true.B) // FIXME: this fails currently
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get) node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
@@ -110,12 +131,22 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
node.a.valid.poke(true.B) node.a.valid.poke(true.B)
} }
def unsetA(): Unit = { def unsetA(nodes: Seq[TLBundle]): Unit = {
nodes.foreach { node => nodes.foreach { node =>
node.a.valid.poke(false.B) node.a.valid.poke(false.B)
} }
} }
it should "coalesce fully consecutive accesses at size 4, only once" in {
test(tb.module)
// .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
{ c =>
println(s"coalIO length = ${c.coalIOs(0).length}")
val nodes = c.coalIOs.map(_.head)
// val nodes = c.cpuNodesImp.map(_.out.head._1)
// val nodes = c.coal.node.in.map(_._1)
// val nodes = c.mitmNodesImp.map(_.in.head._1)
// always ready to take coalesced requests // always ready to take coalesced requests
// c.coalMasterNode.head.a.ready.poke(true.B) // c.coalMasterNode.head.a.ready.poke(true.B)
// c.coal.module.coalescer.io.outReq.ready.poke(true.B) // c.coal.module.coalescer.io.outReq.ready.poke(true.B)
@@ -127,36 +158,26 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
c.clock.step() c.clock.step()
unsetA() unsetA(nodes)
c.clock.step() c.clock.step()
c.clock.step() c.clock.step()
} }
} }
it should "coalesce strided accesses at size 6" in { it should "coalesce identical addresses (stride of 0)" in {}
} it should "coalesce strided accesses at size 6" in {}
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in { it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {}
} it should "not touch uncoalescable requests" in {}
it should "not touch uncoalescable requests" in { it should "allow temporal coalescing when depth >=2" in {}
} it should "select the most coverage mono-coalescer" in {}
it should "allow temporal coalescing when depth >=2" in { it should "resort to the backup policy when coverage is below average" in {}
}
it should "select the most coverage mono-coalescer" in {
}
it should "resort to the backup policy when coverage is below average" in {
}
} }
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
@@ -381,7 +402,8 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
} }
} }
object testConfig extends CoalescerConfig( object testConfig
extends CoalescerConfig(
maxSize = 5, maxSize = 5,
queueDepth = 2, queueDepth = 2,
waitTimeout = 8, waitTimeout = 8,
@@ -394,7 +416,7 @@ object testConfig extends CoalescerConfig(
numOldSrcIds = 16, numOldSrcIds = 16,
numNewSrcIds = 4, numNewSrcIds = 4,
respQueueDepth = 4, respQueueDepth = 4,
coalSizes = Seq(4, 5), coalLogSizes = Seq(4, 5),
sizeEnum = DefaultInFlightTableSizeEnum sizeEnum = DefaultInFlightTableSizeEnum
) )