Fix matchCount having bit width of 1 for spatial-only
This commit is contained in:
@@ -56,7 +56,7 @@ case class CoalescerConfig(
|
|||||||
)
|
)
|
||||||
|
|
||||||
object defaultConfig extends CoalescerConfig(
|
object defaultConfig extends CoalescerConfig(
|
||||||
numLanes = 32,
|
numLanes = 4,
|
||||||
// TODO: bigger size
|
// TODO: bigger size
|
||||||
maxSize = 3,
|
maxSize = 3,
|
||||||
queueDepth = 1,
|
queueDepth = 1,
|
||||||
@@ -258,50 +258,76 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
|
|
||||||
io := DontCare
|
io := DontCare
|
||||||
|
|
||||||
val size = coalSize
|
// Combinational logic to drive output from window contents.
|
||||||
val mask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
|
// The leader lanes only compare their heads against all entries of the
|
||||||
|
// follower lanes.
|
||||||
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
|
|
||||||
(req0.op === req1.op) &&
|
|
||||||
(req0v && req1v) &&
|
|
||||||
((req0.address & this.mask) === (req1.address & this.mask))
|
|
||||||
}
|
|
||||||
|
|
||||||
// combinational logic to drive output from window contents
|
|
||||||
val leaders = io.window.map(_.elts.head)
|
val leaders = io.window.map(_.elts.head)
|
||||||
val leadersValid = io.window.map(_.mask.asBools.head)
|
val leadersValid = io.window.map(_.mask.asBools.head)
|
||||||
|
|
||||||
// debug assertions and prints
|
// When doing spatial-only coalescing, queues should never drift from each
|
||||||
when (leadersValid.reduce(_ || _)) {
|
// other, i.e. the queue heads should always contain mem requests from the
|
||||||
|
// same instruction.
|
||||||
|
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
|
||||||
|
// requests with full source info even when the corresponding lane is not
|
||||||
|
// active.
|
||||||
|
def testNoQueueDrift = {
|
||||||
|
leaders.map((_, true.B))
|
||||||
|
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) =>
|
||||||
|
(h1, Mux(m0, (h0.source === h1.source), false.B))
|
||||||
|
}._2
|
||||||
|
}
|
||||||
|
def printQueueHeads = {
|
||||||
leaders.zipWithIndex.foreach{ case (head, i) =>
|
leaders.zipWithIndex.foreach{ case (head, i) =>
|
||||||
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
|
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
|
||||||
leadersValid(i), head.source, head.address)
|
leadersValid(i), head.source, head.address)
|
||||||
}
|
}
|
||||||
// when spatial-only coalescing, queue heads should never drift from each
|
}
|
||||||
// other
|
|
||||||
// FIXME: This relies on the MemTraceDriver's behavior of generating TL
|
// debug assertions and prints
|
||||||
// requests with full source info even when the corresponding lane is not
|
when (leadersValid.reduce(_ || _)) {
|
||||||
// active.
|
assert(testNoQueueDrift, "unexpected drift between lane request queues")
|
||||||
val leadersSourceMatch = leaders.map((_, true.B))
|
printQueueHeads
|
||||||
.reduce[(ReqQueueEntry, Bool)] { case ((h0, m0), (h1, _)) =>
|
}
|
||||||
(h1, Mux(m0, (h0.source === h1.source), false.B))
|
|
||||||
}._2
|
val size = coalSize
|
||||||
assert(leadersSourceMatch, "unexpected drift between lane request queues")
|
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
|
||||||
|
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
|
||||||
|
(req0.op === req1.op) &&
|
||||||
|
(req0v && req1v) &&
|
||||||
|
((req0.address & this.addrMask) === (req1.address & this.addrMask))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: match leader to only lanes >= leader idx
|
// TODO: match leader to only lanes >= leader idx
|
||||||
val matches = (leaders zip leadersValid).map { case (leader, leaderValid) =>
|
// Gives a 2-D table of Bools representing match at that entry, per lane.
|
||||||
io.window.map {followerLane =>
|
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) =>
|
||||||
followerLane.elts.zip(followerLane.mask.asBools).map { case (follower, followerValid) =>
|
io.window.map { followerLane =>
|
||||||
this.canMatch(follower, followerValid, leader, leaderValid)
|
// compare leader's head against follower's every queue entry
|
||||||
|
(followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) =>
|
||||||
|
canMatch(follower, followerValid, leader, leaderValid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: potentially expensive
|
// TODO: potentially expensive
|
||||||
val matchCounts = matches.map(leader => leader.map(PopCount(_)).reduce(_ + _))
|
val matchCounts = matchTablePerLane.map(table => table.map( PopCount(_) )
|
||||||
|
.reduce{ (m0, m1) =>
|
||||||
|
// this is clunky; what's a good way to extend a UInt's bit width?
|
||||||
|
val m0u = Wire(UInt(4.W))
|
||||||
|
val m1u = Wire(UInt(4.W))
|
||||||
|
m0u := m0
|
||||||
|
m1u := m1
|
||||||
|
m0u + m1u
|
||||||
|
})
|
||||||
|
// NOTE: be careful to not have matchCount result to be 1-bit wide
|
||||||
|
assert(matchCounts(0).getWidth > 0)
|
||||||
val canCoalesce = matchCounts.map(_ > 1.U)
|
val canCoalesce = matchCounts.map(_ > 1.U)
|
||||||
|
|
||||||
|
when (leadersValid.reduce(_ || _)) {
|
||||||
|
matchCounts.zipWithIndex.foreach { case (count, i) =>
|
||||||
|
printf(s"lane[${i}] matchCount = %d\n", count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: maybe use round robin arbiter instead of argmax to pick leader
|
// TODO: maybe use round robin arbiter instead of argmax to pick leader
|
||||||
val chosenLeaderIdx = matchCounts.zipWithIndex.map {
|
val chosenLeaderIdx = matchCounts.zipWithIndex.map {
|
||||||
case (a, b) => (a, b.U)
|
case (a, b) => (a, b.U)
|
||||||
@@ -310,7 +336,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
}._2
|
}._2
|
||||||
|
|
||||||
val chosenLeader = VecInit(leaders)(chosenLeaderIdx)
|
val chosenLeader = VecInit(leaders)(chosenLeaderIdx)
|
||||||
val chosenMatches = VecInit(matches.map(leader => VecInit(leader.map(VecInit(_).asUInt))))(chosenLeaderIdx)
|
val chosenMatches = VecInit(matchTablePerLane.map(leader => VecInit(leader.map(VecInit(_).asUInt))))(chosenLeaderIdx)
|
||||||
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
|
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
|
||||||
|
|
||||||
// coverage calculation
|
// coverage calculation
|
||||||
@@ -321,7 +347,7 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
}
|
}
|
||||||
|
|
||||||
io.results.leaderIdx := chosenLeaderIdx
|
io.results.leaderIdx := chosenLeaderIdx
|
||||||
io.results.baseAddr := chosenLeader.address & mask
|
io.results.baseAddr := chosenLeader.address & addrMask
|
||||||
io.results.matchOH := chosenMatches
|
io.results.matchOH := chosenMatches
|
||||||
io.results.matchCount := chosenMatchCount
|
io.results.matchCount := chosenMatchCount
|
||||||
io.results.coverageHits := PopCount(hits)
|
io.results.coverageHits := PopCount(hits)
|
||||||
@@ -424,8 +450,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
||||||
|
|
||||||
// uncomment the following lines to disable coalescing entirely
|
// uncomment the following lines to disable coalescing entirely
|
||||||
io.outReq.valid := false.B
|
// io.outReq.valid := false.B
|
||||||
io.invalidate.valid := false.B
|
// io.invalidate.valid := false.B
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
|
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) {
|
||||||
@@ -1402,10 +1428,12 @@ object TracePrintf {
|
|||||||
|
|
||||||
// tracedriver --> coalescer --> tracelogger --> tlram
|
// tracedriver --> coalescer --> tracelogger --> tlram
|
||||||
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
|
class TLRAMCoalescerLogger(implicit p: Parameters) extends LazyModule {
|
||||||
// TODO: use parameters for numLanes
|
|
||||||
val numLanes = 4
|
|
||||||
// val filename = "test.trace"
|
// val filename = "test.trace"
|
||||||
val filename = "vecadd.core1.thread4.trace"
|
val filename = "vecadd.core1.thread4.trace"
|
||||||
|
// val filename = "nvbit.vecadd.n100000.filter_sm0.trace"
|
||||||
|
// TODO: use parameters for numLanes
|
||||||
|
val numLanes = defaultConfig.numLanes
|
||||||
|
|
||||||
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
|
val driver = LazyModule(new MemTraceDriver(defaultConfig, filename))
|
||||||
val coreSideLogger = LazyModule(
|
val coreSideLogger = LazyModule(
|
||||||
new MemTraceLogger(numLanes, filename, loggerName = "coreside")
|
new MemTraceLogger(numLanes, filename, loggerName = "coreside")
|
||||||
|
|||||||
Reference in New Issue
Block a user