Fix matchCount having bit width of 1 for spatial-only

This commit is contained in:
Hansung Kim
2023-04-25 18:46:26 -07:00
parent 2090b8703e
commit ef25c8a3f0

View File

@@ -56,7 +56,7 @@ case class CoalescerConfig(
)
object defaultConfig extends CoalescerConfig(
numLanes = 32,
numLanes = 4,
// TODO: bigger size
maxSize = 3,
queueDepth = 1,
@@ -258,50 +258,76 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
io := DontCare
val size = coalSize
val mask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
(req0.op === req1.op) &&
(req0v && req1v) &&
((req0.address & this.mask) === (req1.address & this.mask))
}
// combinational logic to drive output from window contents
// Combinational logic to drive output from window contents.
// The leader lanes only compare their heads against all entries of the
// follower lanes.
val leaders = io.window.map(_.elts.head)
val leadersValid = io.window.map(_.mask.asBools.head)
// debug assertions and prints
when (leadersValid.reduce(_ || _)) {
// When doing spatial-only coalescing, queues should never drift from each
// other, i.e. the queue heads should always contain mem requests from the
// same instruction.
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
// requests with full source info even when the corresponding lane is not
// active.
def testNoQueueDrift = {
leaders.map((_, true.B))
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) =>
(h1, Mux(m0, (h0.source === h1.source), false.B))
}._2
}
def printQueueHeads = {
leaders.zipWithIndex.foreach{ case (head, i) =>
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
leadersValid(i), head.source, head.address)
}
// when spatial-only coalescing, queue heads should never drift from each
// other
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
// requests with full source info even when the corresponding lane is not
// active.
val leadersSourceMatch = leaders.map((_, true.B))
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) =>
(h1, Mux(m0, (h0.source === h1.source), false.B))
}._2
assert(leadersSourceMatch, "unexpected drift between lane request queues")
}
// debug assertions and prints
when (leadersValid.reduce(_ || _)) {
assert(testNoQueueDrift, "unexpected drift between lane request queues")
printQueueHeads
}
val size = coalSize
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
(req0.op === req1.op) &&
(req0v && req1v) &&
((req0.address & this.addrMask) === (req1.address & this.addrMask))
}
// TODO: match leader to only lanes >= leader idx
val matches = (leaders zip leadersValid).map { case (leader, leaderValid) =>
io.window.map {followerLane =>
followerLane.elts.zip(followerLane.mask.asBools).map { case (follower, followerValid) =>
this.canMatch(follower, followerValid, leader, leaderValid)
// Gives a 2-D table of Bools representing match at that entry, per lane.
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) =>
io.window.map { followerLane =>
// compare leader's head against follower's every queue entry
(followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) =>
canMatch(follower, followerValid, leader, leaderValid)
}
}
}
// TODO: potentially expensive
val matchCounts = matches.map(leader => leader.map(PopCount(_)).reduce(_ + _))
val matchCounts = matchTablePerLane.map(table => table.map( PopCount(_) )
.reduce{ (m0, m1) =>
// this is clunky; what's a good way to extend a UInt's bit width?
val m0u = Wire(UInt(4.W))
val m1u = Wire(UInt(4.W))
m0u := m0
m1u := m1
m0u + m1u
})
// NOTE: be careful to not have matchCount result to be 1-bit wide
assert(matchCounts(0).getWidth > 0)
val canCoalesce = matchCounts.map(_ > 1.U)
when (leadersValid.reduce(_ || _)) {
matchCounts.zipWithIndex.foreach { case (count, i) =>
printf(s"lane[${i}] matchCount = %d\n", count);
}
}
// TODO: maybe use round robin arbiter instead of argmax to pick leader
val chosenLeaderIdx = matchCounts.zipWithIndex.map {
case (a, b) => (a, b.U)
@@ -310,7 +336,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
}._2
val chosenLeader = VecInit(leaders)(chosenLeaderIdx)
val chosenMatches = VecInit(matches.map(leader => VecInit(leader.map(VecInit(_).asUInt))))(chosenLeaderIdx)
val chosenMatches = VecInit(matchTablePerLane.map(leader => VecInit(leader.map(VecInit(_).asUInt))))(chosenLeaderIdx)
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
// coverage calculation
@@ -321,7 +347,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
}
io.results.leaderIdx := chosenLeaderIdx
io.results.baseAddr := chosenLeader.address & mask
io.results.baseAddr := chosenLeader.address & addrMask
io.results.matchOH := chosenMatches
io.results.matchCount := chosenMatchCount
io.results.coverageHits := PopCount(hits)
@@ -424,8 +450,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
io.invalidate.valid := io.outReq.fire // invalidate only when fire
// uncomment the following lines to disable coalescing entirely
io.outReq.valid := false.B
io.invalidate.valid := false.B
// io.outReq.valid := false.B
// io.invalidate.valid := false.B
}
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
@@ -1402,10 +1428,12 @@ object TracePrintf {
// tracedriver --> coalescer --> tracelogger --> tlram
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
// TODO: use parameters for numLanes
val numLanes = 4
// val filename = "test.trace"
val filename = "vecadd.core1.thread4.trace"
// val filename = "nvbit.vecadd.n100000.filter_sm0.trace"
// TODO: use parameters for numLanes
val numLanes = defaultConfig.numLanes
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
val coreSideLogger = LazyModule(
new MemTraceLogger(numLanes, filename, loggerName = "coreside")