More comments & renames
This commit is contained in:
@@ -36,22 +36,22 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum {
|
||||
}
|
||||
|
||||
case class CoalescerConfig(
|
||||
numLanes: Int, // number of lanes (or threads) in a warp
|
||||
maxSize: Int, // maximum burst size (64 bytes)
|
||||
queueDepth: Int, // request window per lane
|
||||
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||
addressWidth: Int, // assume <= 32
|
||||
dataBusWidth: Int, // memory-side downstream TileLink data bus size
|
||||
// this has to be at least larger than the word size for
|
||||
// the coalescer to perform well
|
||||
// watermark = 2, // minimum buffer occupancy to start coalescing
|
||||
wordSizeInBytes: Int, // 32-bit system
|
||||
wordWidth: Int, // log(WORD_SIZE)
|
||||
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
|
||||
numNewSrcIds: Int, // num of outstanding coalesced requests
|
||||
respQueueDepth: Int, // depth of the response fifo queues
|
||||
coalSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
||||
// must be power of 2's
|
||||
numLanes: Int, // number of lanes (or threads) in a warp
|
||||
maxSize: Int, // maximum burst size (64 bytes)
|
||||
queueDepth: Int, // request window per lane
|
||||
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||
addressWidth: Int, // assume <= 32
|
||||
dataBusWidth: Int, // memory-side downstream TileLink data bus size
|
||||
// this has to be at least larger than the word size for
|
||||
// the coalescer to perform well
|
||||
// watermark = 2, // minimum buffer occupancy to start coalescing
|
||||
wordSizeInBytes: Int, // 32-bit system
|
||||
wordWidth: Int, // log(WORD_SIZE)
|
||||
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
|
||||
numNewSrcIds: Int, // num of outstanding coalesced requests
|
||||
respQueueDepth: Int, // depth of the response fifo queues
|
||||
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
||||
// each size is log(byteSize)
|
||||
sizeEnum: InFlightTableSizeEnum
|
||||
)
|
||||
|
||||
@@ -69,7 +69,7 @@ object defaultConfig extends CoalescerConfig(
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalSizes = Seq(3),
|
||||
coalLogSizes = Seq(3),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
)
|
||||
|
||||
@@ -243,7 +243,7 @@ class CoalShiftQueue[T <: Data](
|
||||
}
|
||||
|
||||
// Software model: coalescer.py
|
||||
class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
config: CoalescerConfig) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
||||
@@ -251,6 +251,8 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
||||
val baseAddr = Output(UInt(config.addressWidth.W))
|
||||
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
|
||||
// number of entries matched with this leader lane's head.
|
||||
// maximum is numLanes * queueDepth
|
||||
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W))
|
||||
val coverageHits = Output(UInt((1 << config.maxSize).W))
|
||||
})
|
||||
@@ -284,7 +286,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
printQueueHeads
|
||||
}
|
||||
|
||||
val size = coalSize
|
||||
val size = coalLogSize
|
||||
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
|
||||
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
|
||||
(req0.op === req1.op) &&
|
||||
@@ -323,6 +325,18 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
})(chosenLeaderIdx)
|
||||
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
|
||||
|
||||
// coverage calculation
|
||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
||||
// 2-D table flattened to 1-D
|
||||
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
||||
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
||||
// count if any of the queue entries accesses the given offset word of the
|
||||
// coalesced chunk; if 1 for all offsets, we've reached 100% utilization
|
||||
// of the coalesced data words
|
||||
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
||||
}
|
||||
|
||||
// debug prints
|
||||
when (leadersValid.reduce(_ || _)) {
|
||||
matchCounts.zipWithIndex.foreach { case (count, i) =>
|
||||
@@ -334,14 +348,12 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
printf("%d ", m)
|
||||
}
|
||||
printf("]\n")
|
||||
}
|
||||
|
||||
// coverage calculation
|
||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
||||
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
||||
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
||||
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
||||
printf("hits = [ ")
|
||||
hits.foreach { m =>
|
||||
printf("%d ", m)
|
||||
}
|
||||
printf("]\n")
|
||||
}
|
||||
|
||||
io.results.leaderIdx := chosenLeaderIdx
|
||||
@@ -356,16 +368,19 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
config: CoalescerConfig) extends Module {
|
||||
|
||||
val io = IO(new Bundle {
|
||||
// coalescing window, connected to the contents of the request queues
|
||||
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
||||
// newly generated coalesced request
|
||||
val outReq = DecoupledIO(coalReqT.cloneType)
|
||||
// invalidate signals going into each request queue's head
|
||||
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
|
||||
})
|
||||
|
||||
val coalescers = config.coalSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
|
||||
val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
|
||||
coalescers.foreach(_.io.window := io.window)
|
||||
|
||||
def normalize(x: Seq[UInt]): Seq[UInt] = {
|
||||
x.zip(config.coalSizes).map { case (hits, size) =>
|
||||
def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
|
||||
(valPerSize zip config.coalLogSizes).map { case (hits, size) =>
|
||||
(hits << (config.maxSize - size).U).asUInt
|
||||
}
|
||||
}
|
||||
@@ -378,27 +393,34 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
}._2
|
||||
}
|
||||
|
||||
// normalize to maximum coalescing size so that we can do fair comparisons
|
||||
// between coalescing results of different sizes
|
||||
val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount))
|
||||
val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits))
|
||||
|
||||
val chosenIdx = Wire(UInt(log2Ceil(config.coalSizes.size).W))
|
||||
val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
|
||||
val chosenValid = Wire(Bool())
|
||||
// minimum 25% coverage
|
||||
val minCoverage = 1.max(1 << (config.maxSize - 4))
|
||||
val minCoverage = 1.max(1 << ((config.maxSize - 2) - 2))
|
||||
printf("matchCount[0]=%d\n", coalescers(0).io.results.matchCount)
|
||||
printf("normalizedMatches[0]=%d\n", normalizedMatches(0))
|
||||
printf("coverageHits[0]=%d\n", coalescers(0).io.results.coverageHits)
|
||||
printf("normalizedHits[0]=%d\n", normalizedHits(0))
|
||||
printf("minCoverage=%d\n", minCoverage.U)
|
||||
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
|
||||
chosenIdx := argMax(normalizedHits)
|
||||
chosenSizeIdx := argMax(normalizedHits)
|
||||
chosenValid := true.B
|
||||
}.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) {
|
||||
chosenIdx := argMax(normalizedMatches)
|
||||
chosenSizeIdx := argMax(normalizedMatches)
|
||||
chosenValid := true.B
|
||||
}.otherwise {
|
||||
chosenIdx := DontCare
|
||||
chosenSizeIdx := DontCare
|
||||
chosenValid := false.B
|
||||
}
|
||||
|
||||
// create coalesced request
|
||||
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenIdx)
|
||||
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenIdx)
|
||||
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenSizeIdx)
|
||||
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
|
||||
|
||||
// flatten requests and matches
|
||||
val flatReqs = io.window.flatMap(_.elts)
|
||||
@@ -437,13 +459,18 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
|
||||
sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created
|
||||
|
||||
val coalesceValid = chosenValid && sourceGen.io.id.valid
|
||||
when (coalesceValid) {
|
||||
printf("coalescing success!\n")
|
||||
}
|
||||
|
||||
io.outReq.bits.source := sourceGen.io.id.bits
|
||||
io.outReq.bits.mask := mask.asUInt
|
||||
io.outReq.bits.data := data.asUInt
|
||||
io.outReq.bits.size := chosenSize
|
||||
io.outReq.bits.address := chosenBundle.baseAddr
|
||||
io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
|
||||
io.outReq.valid := chosenValid && sourceGen.io.id.valid
|
||||
io.outReq.valid := coalesceValid
|
||||
|
||||
io.invalidate.bits := chosenBundle.matchOH
|
||||
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
||||
|
||||
@@ -35,28 +35,47 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
|
||||
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
||||
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
|
||||
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
|
||||
name = "processor-nodes",
|
||||
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
||||
requestFifo = true,
|
||||
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig)
|
||||
TLClientNode(
|
||||
Seq(
|
||||
TLMasterPortParameters.v1(
|
||||
Seq(
|
||||
TLClientParameters(
|
||||
name = "processor-nodes",
|
||||
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
||||
requestFifo = true,
|
||||
visibility = Seq(AddressSet(0x0, 0xffffff))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
) // 24 bit address space (TODO probably use testConfig)
|
||||
}
|
||||
|
||||
val device = new SimpleDevice("dummy", Seq("dummy"))
|
||||
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
|
||||
val l2Nodes = Seq.tabulate(5) { _ =>
|
||||
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters(
|
||||
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
||||
resources = device.reg,
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsArithmetic = TransferSizes(1, beatBytes),
|
||||
supportsLogical = TransferSizes(1, beatBytes),
|
||||
supportsGet = TransferSizes(1, beatBytes),
|
||||
supportsPutFull = TransferSizes(1, beatBytes),
|
||||
supportsPutPartial = TransferSizes(1, beatBytes),
|
||||
supportsHint = TransferSizes(1, beatBytes),
|
||||
fifoId = Some(0))), beatBytes)))
|
||||
TLManagerNode(
|
||||
Seq(
|
||||
TLSlavePortParameters.v1(
|
||||
Seq(
|
||||
TLManagerParameters(
|
||||
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
||||
resources = device.reg,
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsArithmetic = TransferSizes(1, beatBytes),
|
||||
supportsLogical = TransferSizes(1, beatBytes),
|
||||
supportsGet = TransferSizes(1, beatBytes),
|
||||
supportsPutFull = TransferSizes(1, beatBytes),
|
||||
supportsPutPartial = TransferSizes(1, beatBytes),
|
||||
supportsHint = TransferSizes(1, beatBytes),
|
||||
fifoId = Some(0)
|
||||
)
|
||||
),
|
||||
beatBytes
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
val dut = LazyModule(new CoalescingUnit(testConfig))
|
||||
@@ -80,83 +99,85 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
||||
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
behavior of "multi- and mono-coalescers"
|
||||
|
||||
implicit val p: Parameters = Parameters.empty
|
||||
|
||||
val tb = LazyModule(new DummyCoalescingUnitTB())
|
||||
// val outer = LazyModule(new CoalescingUnit(testConfig))
|
||||
|
||||
val coal = tb.dut
|
||||
tb.cpuNodes.foreach(coal.node := _)
|
||||
tb.l2Nodes.foreach(_ := coal.node)
|
||||
|
||||
def pokeA(
|
||||
nodes: Seq[TLBundle],
|
||||
idx: Int,
|
||||
op: Int,
|
||||
size: Int,
|
||||
source: Int,
|
||||
addr: Int,
|
||||
mask: Int,
|
||||
data: Int
|
||||
): Unit = {
|
||||
val node = nodes(idx)
|
||||
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
||||
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
||||
node.a.bits.param.poke(0.U)
|
||||
node.a.bits.size.poke(size.U)
|
||||
node.a.bits.source.poke(source.U)
|
||||
node.a.bits.address.poke(addr.U)
|
||||
node.a.bits.mask.poke(mask.U)
|
||||
node.a.bits.data.poke(data.U)
|
||||
node.a.bits.corrupt.poke(false.B)
|
||||
node.a.valid.poke(true.B)
|
||||
}
|
||||
|
||||
def unsetA(nodes: Seq[TLBundle]): Unit = {
|
||||
nodes.foreach { node =>
|
||||
node.a.valid.poke(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
it should "coalesce fully consecutive accesses at size 4, only once" in {
|
||||
implicit val p: Parameters = Parameters.empty
|
||||
|
||||
val tb = LazyModule(new DummyCoalescingUnitTB())
|
||||
// val outer = LazyModule(new CoalescingUnit(testConfig))
|
||||
|
||||
val coal = tb.dut
|
||||
tb.cpuNodes.foreach(coal.node := _)
|
||||
tb.l2Nodes.foreach(_ := coal.node)
|
||||
|
||||
test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c =>
|
||||
test(tb.module)
|
||||
// .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
|
||||
{ c =>
|
||||
println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||
val nodes = c.coalIOs.map(_.head)
|
||||
// val nodes = c.cpuNodesImp.map(_.out.head._1)
|
||||
// val nodes = c.coal.node.in.map(_._1)
|
||||
// val nodes = c.mitmNodesImp.map(_.in.head._1)
|
||||
|
||||
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = {
|
||||
val node = nodes(idx)
|
||||
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
||||
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
||||
node.a.bits.param.poke(0.U)
|
||||
node.a.bits.size.poke(size.U)
|
||||
node.a.bits.source.poke(source.U)
|
||||
node.a.bits.address.poke(addr.U)
|
||||
node.a.bits.mask.poke(mask.U)
|
||||
node.a.bits.data.poke(data.U)
|
||||
node.a.bits.corrupt.poke(false.B)
|
||||
node.a.valid.poke(true.B)
|
||||
}
|
||||
|
||||
def unsetA(): Unit = {
|
||||
nodes.foreach { node =>
|
||||
node.a.valid.poke(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
// always ready to take coalesced requests
|
||||
// c.coalMasterNode.head.a.ready.poke(true.B)
|
||||
// c.coal.module.coalescer.io.outReq.ready.poke(true.B)
|
||||
|
||||
pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111)
|
||||
pokeA(nodes, idx=1, op=1, size=2, source=0, addr=0x14, mask=0xf, data=0x2222)
|
||||
pokeA(nodes, idx=2, op=1, size=2, source=0, addr=0x18, mask=0xf, data=0x3333)
|
||||
pokeA(nodes, idx=3, op=1, size=2, source=0, addr=0x1c, mask=0xf, data=0x4444)
|
||||
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
|
||||
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
|
||||
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
|
||||
|
||||
c.clock.step()
|
||||
|
||||
unsetA()
|
||||
unsetA(nodes)
|
||||
|
||||
c.clock.step()
|
||||
c.clock.step()
|
||||
}
|
||||
}
|
||||
|
||||
it should "coalesce strided accesses at size 6" in {
|
||||
it should "coalesce identical addresses (stride of 0)" in {}
|
||||
|
||||
}
|
||||
it should "coalesce strided accesses at size 6" in {}
|
||||
|
||||
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {
|
||||
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {}
|
||||
|
||||
}
|
||||
it should "not touch uncoalescable requests" in {}
|
||||
|
||||
it should "not touch uncoalescable requests" in {
|
||||
it should "allow temporal coalescing when depth >=2" in {}
|
||||
|
||||
}
|
||||
it should "select the most coverage mono-coalescer" in {}
|
||||
|
||||
it should "allow temporal coalescing when depth >=2" in {
|
||||
|
||||
}
|
||||
|
||||
it should "select the most coverage mono-coalescer" in {
|
||||
|
||||
}
|
||||
|
||||
it should "resort to the backup policy when coverage is below average" in {
|
||||
|
||||
}
|
||||
it should "resort to the backup policy when coverage is below average" in {}
|
||||
}
|
||||
|
||||
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
@@ -381,22 +402,23 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
}
|
||||
}
|
||||
|
||||
object testConfig extends CoalescerConfig(
|
||||
maxSize = 5,
|
||||
queueDepth = 2,
|
||||
waitTimeout = 8,
|
||||
addressWidth = 24,
|
||||
dataBusWidth = 5,
|
||||
numLanes = 4,
|
||||
// watermark = 2,
|
||||
wordSizeInBytes = 4,
|
||||
wordWidth = 2,
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalSizes = Seq(4, 5),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
)
|
||||
object testConfig
|
||||
extends CoalescerConfig(
|
||||
maxSize = 5,
|
||||
queueDepth = 2,
|
||||
waitTimeout = 8,
|
||||
addressWidth = 24,
|
||||
dataBusWidth = 5,
|
||||
numLanes = 4,
|
||||
// watermark = 2,
|
||||
wordSizeInBytes = 4,
|
||||
wordWidth = 2,
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalLogSizes = Seq(4, 5),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
)
|
||||
|
||||
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
behavior of "uncoalescer"
|
||||
|
||||
Reference in New Issue
Block a user