running coalescing flow
This commit is contained in:
@@ -270,12 +270,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
|
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
|
||||||
// requests with full source info even when the corresponding lane is not
|
// requests with full source info even when the corresponding lane is not
|
||||||
// active.
|
// active.
|
||||||
def testNoQueueDrift = {
|
def testNoQueueDrift: Bool = leaders.map(_.source === leaders.head.source).reduce(_ || _)
|
||||||
leaders.map((_, true.B))
|
|
||||||
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) =>
|
|
||||||
(h1, Mux(m0, (h0.source === h1.source), false.B))
|
|
||||||
}._2
|
|
||||||
}
|
|
||||||
def printQueueHeads = {
|
def printQueueHeads = {
|
||||||
leaders.zipWithIndex.foreach{ case (head, i) =>
|
leaders.zipWithIndex.foreach{ case (head, i) =>
|
||||||
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
|
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
|
||||||
@@ -309,19 +304,8 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: potentially expensive: popcount & adder & greater-than comparator
|
// TODO: potentially expensive: popcount & adder
|
||||||
val matchCounts = matchTablePerLane.map(table => table.map( PopCount(_) )
|
val matchCounts = matchTablePerLane.map(leader => leader.map(PopCount(_)).reduce(_ +& _))
|
||||||
.reduce{ (m0, m1) =>
|
|
||||||
// this is clunky; what's a good way to extend a UInt's bit width?
|
|
||||||
val countWidth = 5 // for 32 lanes, has to be at least 5
|
|
||||||
val m0u = Wire(UInt(countWidth.W))
|
|
||||||
val m1u = Wire(UInt(countWidth.W))
|
|
||||||
m0u := m0
|
|
||||||
m1u := m1
|
|
||||||
m0u + m1u
|
|
||||||
})
|
|
||||||
// NOTE: be careful to not have matchCount result to be 1-bit wide
|
|
||||||
assert(matchCounts(0).getWidth > 0)
|
|
||||||
val canCoalesce = matchCounts.map(_ > 1.U)
|
val canCoalesce = matchCounts.map(_ > 1.U)
|
||||||
|
|
||||||
// TODO: potentially expensive
|
// TODO: potentially expensive
|
||||||
@@ -329,7 +313,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
val chosenLeaderIdx = matchCounts.zipWithIndex.map {
|
val chosenLeaderIdx = matchCounts.zipWithIndex.map {
|
||||||
case (c, i) => (c, i.U)
|
case (c, i) => (c, i.U)
|
||||||
}.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) =>
|
}.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) =>
|
||||||
(Mux(c0 > c1, c0, c1), Mux(c0 > c1, i, j))
|
(Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j))
|
||||||
}._2
|
}._2
|
||||||
|
|
||||||
val chosenLeader = VecInit(leaders)(chosenLeaderIdx)
|
val chosenLeader = VecInit(leaders)(chosenLeaderIdx)
|
||||||
@@ -355,8 +339,9 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
// coverage calculation
|
// coverage calculation
|
||||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
||||||
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||||
|
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
||||||
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
||||||
offsets.map(_ === target.U).reduce(_ || _)
|
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
||||||
}
|
}
|
||||||
|
|
||||||
io.results.leaderIdx := chosenLeaderIdx
|
io.results.leaderIdx := chosenLeaderIdx
|
||||||
@@ -420,7 +405,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
|
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
|
||||||
|
|
||||||
// check for word alignment in addresses
|
// check for word alignment in addresses
|
||||||
assert(io.window.flatMap(_.elts.map(req => req.address(config.wordWidth - 1, 0) === 0.U)).reduce(_ || _),
|
assert(io.window.flatMap(_.elts.map(req => req.address(config.wordWidth - 1, 0) === 0.U)).zip(
|
||||||
|
io.window.flatMap(_.mask.asBools)).map { case (aligned, valid) => (!valid) || aligned }.reduce(_ || _),
|
||||||
"one or more addresses used for coalescing is not word-aligned")
|
"one or more addresses used for coalescing is not word-aligned")
|
||||||
|
|
||||||
// note: this is word-level coalescing. if finer granularity is needed, need to modify code
|
// note: this is word-level coalescing. if finer granularity is needed, need to modify code
|
||||||
@@ -462,6 +448,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
io.invalidate.bits := chosenBundle.matchOH
|
io.invalidate.bits := chosenBundle.matchOH
|
||||||
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
||||||
|
|
||||||
|
dontTouch(io.invalidate) // debug
|
||||||
|
|
||||||
// uncomment the following lines to disable coalescing entirely
|
// uncomment the following lines to disable coalescing entirely
|
||||||
// io.outReq.valid := false.B
|
// io.outReq.valid := false.B
|
||||||
// io.invalidate.valid := false.B
|
// io.invalidate.valid := false.B
|
||||||
|
|||||||
@@ -34,19 +34,16 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
||||||
val cpuNodes = Seq.tabulate(testConfig.NUM_LANES) { _ =>
|
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
|
||||||
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
|
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
|
||||||
name = "processor-nodes",
|
name = "processor-nodes",
|
||||||
sourceId = IdRange(0, testConfig.NUM_OLD_IDS),
|
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
||||||
requestFifo = true,
|
requestFifo = true,
|
||||||
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig)
|
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: this mitm is part of a desperate effort, can remove now & reconnect
|
|
||||||
val mitm = Seq.tabulate(testConfig.NUM_LANES) {_ => TLIdentityNode()}
|
|
||||||
|
|
||||||
val device = new SimpleDevice("dummy", Seq("dummy"))
|
val device = new SimpleDevice("dummy", Seq("dummy"))
|
||||||
val beatBytes = 1 << testConfig.DATA_BUS_SIZE // 256 bit bus
|
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
|
||||||
val l2Nodes = Seq.tabulate(5) { _ =>
|
val l2Nodes = Seq.tabulate(5) { _ =>
|
||||||
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters(
|
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters(
|
||||||
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
||||||
@@ -68,7 +65,6 @@ class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleImp(outer) {
|
class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleImp(outer) {
|
||||||
val mitmNodesImp = outer.mitm
|
|
||||||
val coal = outer.dut
|
val coal = outer.dut
|
||||||
// FIXME: these need to be separate variables because of implicit naming in makeIOs
|
// FIXME: these need to be separate variables because of implicit naming in makeIOs
|
||||||
// there has to be a better way
|
// there has to be a better way
|
||||||
@@ -77,6 +73,8 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
|||||||
val coalIO2 = outer.cpuNodes(2).makeIOs()
|
val coalIO2 = outer.cpuNodes(2).makeIOs()
|
||||||
val coalIO3 = outer.cpuNodes(3).makeIOs()
|
val coalIO3 = outer.cpuNodes(3).makeIOs()
|
||||||
val coalIOs = Seq(coalIO0, coalIO1, coalIO2, coalIO3)
|
val coalIOs = Seq(coalIO0, coalIO1, coalIO2, coalIO3)
|
||||||
|
|
||||||
|
// val coalMasterNode = coal.coalescerNode.makeIOs()
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
@@ -89,11 +87,10 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
// val outer = LazyModule(new CoalescingUnit(testConfig))
|
// val outer = LazyModule(new CoalescingUnit(testConfig))
|
||||||
|
|
||||||
val coal = tb.dut
|
val coal = tb.dut
|
||||||
tb.cpuNodes.zip(tb.mitm).foreach { case (a, b) => b := a }
|
tb.cpuNodes.foreach(coal.node := _)
|
||||||
tb.mitm.foreach(coal.node := _)
|
|
||||||
tb.l2Nodes.foreach(_ := coal.node)
|
tb.l2Nodes.foreach(_ := coal.node)
|
||||||
|
|
||||||
test(tb.module) { c =>
|
test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c =>
|
||||||
val nodes = c.coalIOs.map(_.head)
|
val nodes = c.coalIOs.map(_.head)
|
||||||
// val nodes = c.cpuNodesImp.map(_.out.head._1)
|
// val nodes = c.cpuNodesImp.map(_.out.head._1)
|
||||||
// val nodes = c.coal.node.in.map(_._1)
|
// val nodes = c.coal.node.in.map(_._1)
|
||||||
@@ -101,7 +98,7 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
|
|
||||||
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = {
|
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = {
|
||||||
val node = nodes(idx)
|
val node = nodes(idx)
|
||||||
// node.a.ready.expect(true.B) // TODO: this fails currently
|
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
||||||
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
||||||
node.a.bits.param.poke(0.U)
|
node.a.bits.param.poke(0.U)
|
||||||
node.a.bits.size.poke(size.U)
|
node.a.bits.size.poke(size.U)
|
||||||
@@ -119,10 +116,14 @@ class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// always ready to take coalesced requests
|
||||||
|
// c.coalMasterNode.head.a.ready.poke(true.B)
|
||||||
|
// c.coal.module.coalescer.io.outReq.ready.poke(true.B)
|
||||||
|
|
||||||
pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111)
|
pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111)
|
||||||
pokeA(nodes, idx=1, op=1, size=2, source=1, addr=0x14, mask=0xf, data=0x2222)
|
pokeA(nodes, idx=1, op=1, size=2, source=0, addr=0x14, mask=0xf, data=0x2222)
|
||||||
pokeA(nodes, idx=2, op=1, size=2, source=2, addr=0x18, mask=0xf, data=0x3333)
|
pokeA(nodes, idx=2, op=1, size=2, source=0, addr=0x18, mask=0xf, data=0x3333)
|
||||||
pokeA(nodes, idx=3, op=1, size=2, source=3, addr=0x1c, mask=0xf, data=0x4444)
|
pokeA(nodes, idx=3, op=1, size=2, source=0, addr=0x1c, mask=0xf, data=0x4444)
|
||||||
|
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
|
|
||||||
@@ -381,19 +382,20 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
|
|
||||||
object testConfig extends CoalescerConfig(
|
object testConfig extends CoalescerConfig(
|
||||||
MAX_SIZE = 5, // maximum coalesced size
|
maxSize = 5,
|
||||||
DEPTH = 2, // request window per lane
|
queueDepth = 2,
|
||||||
WAIT_TIMEOUT = 8, // max cycles to wait before forced fifo dequeue, per lane
|
waitTimeout = 8,
|
||||||
ADDR_WIDTH = 24, // assume <= 32
|
addressWidth = 24,
|
||||||
DATA_BUS_SIZE = 5, // 2^5=32 bytes, 256 bit bus
|
dataBusWidth = 5,
|
||||||
NUM_LANES = 4,
|
numLanes = 4,
|
||||||
// WATERMARK = 2, // minimum buffer occupancy to start coalescing
|
// watermark = 2,
|
||||||
WORD_SIZE = 4, // 32-bit system
|
wordSizeInBytes = 4,
|
||||||
WORD_WIDTH = 2, // log(WORD_SIZE)
|
wordWidth = 2,
|
||||||
NUM_OLD_IDS = 16, // num of outstanding requests per lane, from processor
|
numOldSrcIds = 16,
|
||||||
NUM_NEW_IDS = 4, // num of outstanding coalesced requests
|
numNewSrcIds = 4,
|
||||||
COAL_SIZES = Seq(4, 5),
|
respQueueDepth = 4,
|
||||||
SizeEnum = DefaultInFlightTableSizeEnum
|
coalSizes = Seq(4, 5),
|
||||||
|
sizeEnum = DefaultInFlightTableSizeEnum
|
||||||
)
|
)
|
||||||
|
|
||||||
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
@@ -482,7 +484,7 @@ class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
numPerLaneReqs,
|
numPerLaneReqs,
|
||||||
sourceWidth,
|
sourceWidth,
|
||||||
offsetBits,
|
offsetBits,
|
||||||
testConfig.SizeEnum
|
testConfig.sizeEnum
|
||||||
)
|
)
|
||||||
|
|
||||||
// it should "stop enqueueing when full" in {
|
// it should "stop enqueueing when full" in {
|
||||||
|
|||||||
Reference in New Issue
Block a user