Fix matchCount having bit width of 1 for spatial-only

This commit is contained in:
Hansung Kim
2023-04-25 18:46:26 -07:00
parent 2090b8703e
commit ef25c8a3f0

View File

@@ -56,7 +56,7 @@ case class CoalescerConfig(
) )
object defaultConfig extends CoalescerConfig( object defaultConfig extends CoalescerConfig(
numLanes = 32, numLanes = 4,
// TODO: bigger size // TODO: bigger size
maxSize = 3, maxSize = 3,
queueDepth = 1, queueDepth = 1,
@@ -258,50 +258,76 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
io := DontCare io := DontCare
val size = coalSize // Combinational logic to drive output from window contents.
val mask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U // The leader lanes only compare their heads against all entries of the
// follower lanes.
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
(req0.op === req1.op) &&
(req0v && req1v) &&
((req0.address & this.mask) === (req1.address & this.mask))
}
// combinational logic to drive output from window contents
val leaders = io.window.map(_.elts.head) val leaders = io.window.map(_.elts.head)
val leadersValid = io.window.map(_.mask.asBools.head) val leadersValid = io.window.map(_.mask.asBools.head)
// debug assertions and prints // When doing spatial-only coalescing, queues should never drift from each
when (leadersValid.reduce(_ || _)) { // other, i.e. the queue heads should always contain mem requests from the
// same instruction.
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
// requests with full source info even when the corresponding lane is not
// active.
def testNoQueueDrift = {
leaders.map((_, true.B))
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) =>
(h1, Mux(m0, (h0.source === h1.source), false.B))
}._2
}
def printQueueHeads = {
leaders.zipWithIndex.foreach{ case (head, i) => leaders.zipWithIndex.foreach{ case (head, i) =>
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n", printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
leadersValid(i), head.source, head.address) leadersValid(i), head.source, head.address)
} }
// when spatial-only coalescing, queue heads should never drift from each }
// other
// FIXME: This relies on the MemTraceDriver's behavior of generating TL // debug assertions and prints
// requests with full source info even when the corresponding lane is not when (leadersValid.reduce(_ || _)) {
// active. assert(testNoQueueDrift, "unexpected drift between lane request queues")
val leadersSourceMatch = leaders.map((_, true.B)) printQueueHeads
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) => }
(h1, Mux(m0, (h0.source === h1.source), false.B))
}._2 val size = coalSize
assert(leadersSourceMatch, "unexpected drift between lane request queues") val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
(req0.op === req1.op) &&
(req0v && req1v) &&
((req0.address & this.addrMask) === (req1.address & this.addrMask))
} }
// TODO: match leader to only lanes >= leader idx // TODO: match leader to only lanes >= leader idx
val matches = (leaders zip leadersValid).map { case (leader, leaderValid) => // Gives a 2-D table of Bools representing match at that entry, per lane.
io.window.map {followerLane => val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) =>
followerLane.elts.zip(followerLane.mask.asBools).map { case (follower, followerValid) => io.window.map { followerLane =>
this.canMatch(follower, followerValid, leader, leaderValid) // compare leader's head against follower's every queue entry
(followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) =>
canMatch(follower, followerValid, leader, leaderValid)
} }
} }
} }
// TODO: potentially expensive // TODO: potentially expensive
val matchCounts = matches.map(leader => leader.map(PopCount(_)).reduce(_ + _)) val matchCounts = matchTablePerLane.map(table => table.map( PopCount(_) )
.reduce{ (m0, m1) =>
// this is clunky; what's a good way to extend a UInt's bit width?
val m0u = Wire(UInt(4.W))
val m1u = Wire(UInt(4.W))
m0u := m0
m1u := m1
m0u + m1u
})
// NOTE: be careful to not have matchCount result to be 1-bit wide
assert(matchCounts(0).getWidth > 0)
val canCoalesce = matchCounts.map(_ > 1.U) val canCoalesce = matchCounts.map(_ > 1.U)
when (leadersValid.reduce(_ || _)) {
matchCounts.zipWithIndex.foreach { case (count, i) =>
printf(s"lane[${i}] matchCount = %d\n", count);
}
}
// TODO: maybe use round robin arbiter instead of argmax to pick leader // TODO: maybe use round robin arbiter instead of argmax to pick leader
val chosenLeaderIdx = matchCounts.zipWithIndex.map { val chosenLeaderIdx = matchCounts.zipWithIndex.map {
case (a, b) => (a, b.U) case (a, b) => (a, b.U)
@@ -310,7 +336,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
}._2 }._2
val chosenLeader = VecInit(leaders)(chosenLeaderIdx) val chosenLeader = VecInit(leaders)(chosenLeaderIdx)
val chosenMatches = VecInit(matches.map(leader => VecInit(leader.map(VecInit(_).asUInt))))(chosenLeaderIdx) val chosenMatches = VecInit(matchTablePerLane.map(leader => VecInit(leader.map(VecInit(_).asUInt))))(chosenLeaderIdx)
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx) val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
// coverage calculation // coverage calculation
@@ -321,7 +347,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
} }
io.results.leaderIdx := chosenLeaderIdx io.results.leaderIdx := chosenLeaderIdx
io.results.baseAddr := chosenLeader.address & mask io.results.baseAddr := chosenLeader.address & addrMask
io.results.matchOH := chosenMatches io.results.matchOH := chosenMatches
io.results.matchCount := chosenMatchCount io.results.matchCount := chosenMatchCount
io.results.coverageHits := PopCount(hits) io.results.coverageHits := PopCount(hits)
@@ -424,8 +450,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
io.invalidate.valid := io.outReq.fire // invalidate only when fire io.invalidate.valid := io.outReq.fire // invalidate only when fire
// uncomment the following lines to disable coalescing entirely // uncomment the following lines to disable coalescing entirely
io.outReq.valid := false.B // io.outReq.valid := false.B
io.invalidate.valid := false.B // io.invalidate.valid := false.B
} }
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) { class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
@@ -1402,10 +1428,12 @@ object TracePrintf {
// tracedriver --> coalescer --> tracelogger --> tlram // tracedriver --> coalescer --> tracelogger --> tlram
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule { class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
// TODO: use parameters for numLanes
val numLanes = 4
// val filename = "test.trace" // val filename = "test.trace"
val filename = "vecadd.core1.thread4.trace" val filename = "vecadd.core1.thread4.trace"
// val filename = "nvbit.vecadd.n100000.filter_sm0.trace"
// TODO: use parameters for numLanes
val numLanes = defaultConfig.numLanes
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename)) val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
val coreSideLogger = LazyModule( val coreSideLogger = LazyModule(
new MemTraceLogger(numLanes, filename, loggerName = "coreside") new MemTraceLogger(numLanes, filename, loggerName = "coreside")