This commit is contained in:
Hansung Kim
2023-05-11 16:11:39 -07:00
parent 7fa6be4a8b
commit 0c8909cb43

View File

@@ -152,7 +152,7 @@ class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: In
fromSource = this.source, fromSource = this.source,
toAddress = this.address, toAddress = this.address,
lgSize = this.size, lgSize = this.size,
data = this.data, data = this.data
) )
val (glegal, gbits) = edgeOut.Get( val (glegal, gbits) = edgeOut.Get(
fromSource = this.source, fromSource = this.source,
@@ -166,17 +166,22 @@ class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: In
} }
} }
case class NonCoalescedRequest(config: CoalescerConfig) case class NonCoalescedRequest(config: CoalescerConfig)
extends Request(sourceWidth = log2Ceil(config.numOldSrcIds), extends Request(
sizeWidth = config.wordSizeWidth, sourceWidth = log2Ceil(config.numOldSrcIds),
addressWidth = config.addressWidth, sizeWidth = config.wordSizeWidth,
dataWidth = config.wordSizeInBytes * 8) addressWidth = config.addressWidth,
dataWidth = config.wordSizeInBytes * 8
)
case class CoalescedRequest(config: CoalescerConfig) case class CoalescedRequest(config: CoalescerConfig)
extends Request(sourceWidth = log2Ceil(config.numNewSrcIds), extends Request(
sizeWidth = log2Ceil(config.maxCoalLogSize), sourceWidth = log2Ceil(config.numNewSrcIds),
addressWidth = config.addressWidth, sizeWidth = log2Ceil(config.maxCoalLogSize),
dataWidth = (8 * (1 << config.maxCoalLogSize))) addressWidth = config.addressWidth,
dataWidth = (8 * (1 << config.maxCoalLogSize))
)
class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int)
extends Bundle {
val op = UInt(1.W) // 0=READ 1=WRITE val op = UInt(1.W) // 0=READ 1=WRITE
val size = UInt(sizeWidth.W) val size = UInt(sizeWidth.W)
val source = UInt(sourceWidth.W) val source = UInt(sourceWidth.W)
@@ -205,17 +210,22 @@ class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle
} }
} }
case class NonCoalescedResponse(config: CoalescerConfig) case class NonCoalescedResponse(config: CoalescerConfig)
extends Response(sourceWidth = log2Ceil(config.numOldSrcIds), extends Response(
sizeWidth = config.wordSizeWidth, sourceWidth = log2Ceil(config.numOldSrcIds),
dataWidth = config.wordSizeInBytes * 8) sizeWidth = config.wordSizeWidth,
dataWidth = config.wordSizeInBytes * 8
)
case class CoalescedResponse(config: CoalescerConfig) case class CoalescedResponse(config: CoalescerConfig)
extends Response(sourceWidth = log2Ceil(config.numNewSrcIds), extends Response(
sizeWidth = log2Ceil(config.maxCoalLogSize), sourceWidth = log2Ceil(config.numNewSrcIds),
dataWidth = (8 * (1 << config.maxCoalLogSize))) sizeWidth = log2Ceil(config.maxCoalLogSize),
dataWidth = (8 * (1 << config.maxCoalLogSize))
)
// If `ignoreInUse`, just keep giving out new IDs without checking if it is in // If `ignoreInUse`, just keep giving out new IDs without checking if it is in
// use. // use.
class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module { class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true)
extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val gen = Input(Bool()) val gen = Input(Bool())
val reclaim = Input(Valid(UInt(sourceWidth.W))) val reclaim = Input(Valid(UInt(sourceWidth.W)))
@@ -234,15 +244,16 @@ class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) e
io.id.valid := (if (ignoreInUse) true.B else !occupancyTable(head).valid) io.id.valid := (if (ignoreInUse) true.B else !occupancyTable(head).valid)
io.id.bits := head io.id.bits := head
when (io.gen && io.id.valid /* fire */) { when(io.gen && io.id.valid /* fire */ ) {
occupancyTable(io.id.bits).valid := true.B // mark in use occupancyTable(io.id.bits).valid := true.B // mark in use
} }
when (io.reclaim.valid) { when(io.reclaim.valid) {
occupancyTable(io.reclaim.bits).valid := false.B // mark freed occupancyTable(io.reclaim.bits).valid := false.B // mark freed
} }
} }
class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module { class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val queue = new Bundle { val queue = new Bundle {
val enq = Vec(config.numLanes, DeqIO(gen.cloneType)) val enq = Vec(config.numLanes, DeqIO(gen.cloneType))
@@ -259,7 +270,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// eltPrototype.valid := false.B // eltPrototype.valid := false.B
val elts = Reg(Vec(config.numLanes, Vec(entries, Valid(gen)))) val elts = Reg(Vec(config.numLanes, Vec(entries, Valid(gen))))
val writePtr = RegInit(VecInit(Seq.fill(config.numLanes)(0.asUInt(log2Ceil(entries + 1).W)))) val writePtr = RegInit(
VecInit(Seq.fill(config.numLanes)(0.asUInt(log2Ceil(entries + 1).W)))
)
val deqDone = RegInit(VecInit(Seq.fill(config.numLanes)(false.B))) val deqDone = RegInit(VecInit(Seq.fill(config.numLanes)(false.B)))
private def resetElts = { private def resetElts = {
@@ -270,7 +283,7 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
} }
} }
} }
when (reset.asBool) { when(reset.asBool) {
resetElts resetElts
} }
@@ -286,14 +299,17 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// current cycle. // current cycle.
// //
// shift hint is when the heads have no more coalescable left this or next cycle // shift hint is when the heads have no more coalescable left this or next cycle
val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, inv) => val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0)))
c && !(io.invalidate.valid && inv) .map { case (c, inv) =>
}.reduce(_ || _) c && !(io.invalidate.valid && inv)
}
.reduce(_ || _)
val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _) val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _)
// valid && !fire means we enable enqueueing to a full queue, provided the // valid && !fire means we enable enqueueing to a full queue, provided the
// arbiter is taking away all remaining valid queue heads in the next cycle so // arbiter is taking away all remaining valid queue heads in the next cycle so
// that we make space for the entire next warp. // that we make space for the entire next warp.
val syncedDeqValidNextCycle = io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) val syncedDeqValidNextCycle =
io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _)
for (i <- 0 until config.numLanes) { for (i <- 0 until config.numLanes) {
val enq = io.queue.enq(i) val enq = io.queue.enq(i)
@@ -313,20 +329,22 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// can take new entries if not empty, or if full but shifting // can take new entries if not empty, or if full but shifting
enq.ready := (!ctrl.full) || ctrl.shift enq.ready := (!ctrl.full) || ctrl.shift
when (ctrl.shift) { when(ctrl.shift) {
// shift, invalidate tail, invalidate coalesced requests // shift, invalidate tail, invalidate coalesced requests
elts(i).zipWithIndex.foreach { case (elt, j) => elts(i).zipWithIndex.foreach { case (elt, j) =>
if (j == entries - 1) { // tail if (j == entries - 1) { // tail
elt.valid := false.B elt.valid := false.B
} else { } else {
elt.bits := elts(i)(j + 1).bits elt.bits := elts(i)(j + 1).bits
elt.valid := elts(i)(j + 1).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) elt.valid := elts(i)(
j + 1
).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1))
} }
} }
// reset dequeue mask when new entries are shifted in // reset dequeue mask when new entries are shifted in
deqDone(i) := false.B deqDone(i) := false.B
// enqueue // enqueue
when (enq.ready && syncedEnqValid) { // to allow drift, swap for enq.fire when(enq.ready && syncedEnqValid) { // to allow drift, swap for enq.fire
elts(i)(writePtr(i) - 1.U).bits := enq.bits elts(i)(writePtr(i) - 1.U).bits := enq.bits
elts(i)(writePtr(i) - 1.U).valid := enq.valid elts(i)(writePtr(i) - 1.U).valid := enq.valid
}.otherwise { }.otherwise {
@@ -334,13 +352,13 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
} }
}.otherwise { }.otherwise {
// invalidate coalesced requests // invalidate coalesced requests
when (io.invalidate.valid) { when(io.invalidate.valid) {
(elts(i) zip io.invalidate.bits(i).asBools).map { case (elt, inv) => (elts(i) zip io.invalidate.bits(i).asBools).map { case (elt, inv) =>
elt.valid := elt.valid && !inv elt.valid := elt.valid && !inv
} }
} }
// enqueue // enqueue
when (enq.ready && syncedEnqValid) { when(enq.ready && syncedEnqValid) {
elts(i)(writePtr(i)).bits := enq.bits elts(i)(writePtr(i)).bits := enq.bits
elts(i)(writePtr(i)).valid := enq.valid elts(i)(writePtr(i)).valid := enq.valid
writePtr(i) := writePtr(i) + 1.U writePtr(i) := writePtr(i) + 1.U
@@ -352,8 +370,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// When doing spatial-only coalescing, queues should never drift from each // When doing spatial-only coalescing, queues should never drift from each
// other, i.e. the queue heads should always contain mem requests from the // other, i.e. the queue heads should always contain mem requests from the
// same instruction. // same instruction.
val queueInSync = controlSignals.map(_ === controlSignals.head).reduce(_ && _) && val queueInSync =
writePtr.map(_ === writePtr.head).reduce(_ && _) controlSignals.map(_ === controlSignals.head).reduce(_ && _) &&
writePtr.map(_ === writePtr.head).reduce(_ && _)
assert(queueInSync, "shift queue lanes are not in sync") assert(queueInSync, "shift queue lanes are not in sync")
io.mask := elts.map(x => VecInit(x.map(_.valid)).asUInt) io.mask := elts.map(x => VecInit(x.map(_.valid)).asUInt)
@@ -361,8 +380,11 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
} }
// Software model: coalescer.py // Software model: coalescer.py
class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedRequest], class MonoCoalescer(
config: CoalescerConfig) extends Module { coalLogSize: Int,
windowT: CoalShiftQueue[NonCoalescedRequest],
config: CoalescerConfig
) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val window = Input(windowT.io.cloneType) val window = Input(windowT.io.cloneType)
val results = Output(new Bundle { val results = Output(new Bundle {
@@ -371,8 +393,10 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W))) val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
// number of entries matched with this leader lane's head. // number of entries matched with this leader lane's head.
// maximum is numLanes * queueDepth // maximum is numLanes * queueDepth
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W)) val matchCount =
val coverageHits = Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W)) Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W))
val coverageHits =
Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W))
val canCoalesce = Output(Vec(config.numLanes, Bool())) val canCoalesce = Output(Vec(config.numLanes, Bool()))
}) })
}) })
@@ -386,9 +410,13 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques
val leadersValid = io.window.mask.map(_.asBools.head) val leadersValid = io.window.mask.map(_.asBools.head)
def printQueueHeads = { def printQueueHeads = {
leaders.zipWithIndex.foreach{ case (head, i) => leaders.zipWithIndex.foreach { case (head, i) =>
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n", printf(
leadersValid(i), head.source, head.address) s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
leadersValid(i),
head.source,
head.address
)
} }
} }
// when (leadersValid.reduce(_ || _)) { // when (leadersValid.reduce(_ || _)) {
@@ -406,34 +434,42 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques
// Gives a 2-D table of Bools representing match at every queue entry, // Gives a 2-D table of Bools representing match at every queue entry,
// for each lane (so 3-D in total). // for each lane (so 3-D in total).
// dimensions: (leader lane, follower lane, follower entry) // dimensions: (leader lane, follower lane, follower entry)
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) => val matchTablePerLane = (leaders zip leadersValid).map {
(io.window.elts zip io.window.mask).map { case (followers, followerValids) => case (leader, leaderValid) =>
// compare leader's head against follower's every queue entry (io.window.elts zip io.window.mask).map {
(followers zip followerValids.asBools).map { case (follower, followerValid) => case (followers, followerValids) =>
canMatch(follower, followerValid, leader, leaderValid) // compare leader's head against follower's every queue entry
// FIXME: disabling halving optimization because it does not give the (followers zip followerValids.asBools).map {
// correct per-lane coalescable indication to the shift queue case (follower, followerValid) =>
// // match leader to only followers at lanes >= leader idx canMatch(follower, followerValid, leader, leaderValid)
// // this halves the number of comparators // FIXME: disabling halving optimization because it does not give the
// if (followerIndex < leaderIndex) false.B // correct per-lane coalescable indication to the shift queue
// else canMatch(follower, followerValid, leader, leaderValid) // // match leader to only followers at lanes >= leader idx
// // this halves the number of comparators
// if (followerIndex < leaderIndex) false.B
// else canMatch(follower, followerValid, leader, leaderValid)
}
} }
}
} }
val matchCounts = matchTablePerLane.map(table => val matchCounts = matchTablePerLane.map(table =>
table.map(PopCount(_)) // sum up each column table
.reduce(_ +& _)) .map(PopCount(_)) // sum up each column
.reduce(_ +& _)
)
val canCoalesce = matchCounts.map(_ > 1.U) val canCoalesce = matchCounts.map(_ > 1.U)
// Elect the leader that has the most match counts. // Elect the leader that has the most match counts.
// TODO: potentially expensive: magnitude comparator // TODO: potentially expensive: magnitude comparator
def chooseLeaderArgMax(matchCounts: Seq[UInt]): UInt = { def chooseLeaderArgMax(matchCounts: Seq[UInt]): UInt = {
matchCounts.zipWithIndex.map { matchCounts.zipWithIndex
case (c, i) => (c, i.U) .map { case (c, i) =>
}.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) => (c, i.U)
}
.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) =>
(Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j)) (Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j))
}._2 }
._2
} }
// Elect leader by choosing the smallest-index lane that has a valid // Elect leader by choosing the smallest-index lane that has a valid
// match, i.e. using priority encoder. // match, i.e. using priority encoder.
@@ -444,7 +480,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques
val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux
// matchTable for the chosen lane, but converted to a Vec[UInt] // matchTable for the chosen lane, but converted to a Vec[UInt]
val chosenMatches = VecInit(matchTablePerLane.map{ table => val chosenMatches = VecInit(matchTablePerLane.map { table =>
VecInit(table.map(VecInit(_).asUInt)) VecInit(table.map(VecInit(_).asUInt))
})(chosenLeaderIdx) })(chosenLeaderIdx)
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx) val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
@@ -452,18 +488,21 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques
// coverage calculation // coverage calculation
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth) def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth)
// 2-D table flattened to 1-D // 2-D table flattened to 1-D
val offsets = io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) val offsets =
io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address)))
val valids = chosenMatches.flatMap(_.asBools) val valids = chosenMatches.flatMap(_.asBools)
// indicates for each word in the coalesced chunk whether it is accessed by // indicates for each word in the coalesced chunk whether it is accessed by
// any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four // any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four
// words in the coalesced data coming back will be accessed by some request // words in the coalesced data coming back will be accessed by some request
// and we've reached 100% bandwidth utilization. // and we've reached 100% bandwidth utilization.
val hits = Seq.tabulate(1 << (size - config.wordSizeWidth)) { target => val hits = Seq.tabulate(1 << (size - config.wordSizeWidth)) { target =>
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _) (offsets zip valids)
.map { case (offset, valid) => valid && (offset === target.U) }
.reduce(_ || _)
} }
// debug prints // debug prints
when (leadersValid.reduce(_ || _)) { when(leadersValid.reduce(_ || _)) {
matchCounts.zipWithIndex.foreach { case (count, i) => matchCounts.zipWithIndex.foreach { case (count, i) =>
printf(s"lane[${i}] matchCount = %d\n", count); printf(s"lane[${i}] matchCount = %d\n", count);
} }
@@ -492,20 +531,26 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[NonCoalescedReques
// coalesced request out of all possible combinations. // coalesced request out of all possible combinations.
// //
// Software model: coalescer.py // Software model: coalescer.py
class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Request, class MultiCoalescer(
config: CoalescerConfig) extends Module { windowT: CoalShiftQueue[NonCoalescedRequest],
coalReqT: Request,
config: CoalescerConfig
) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
// coalescing window, connected to the contents of the request queues // coalescing window, connected to the contents of the request queues
val window = Input(windowT.io.cloneType) val window = Input(windowT.io.cloneType)
// generated coalesced request // generated coalesced request
val coalReq = DecoupledIO(coalReqT.cloneType) val coalReq = DecoupledIO(coalReqT.cloneType)
// invalidate signals going into each request queue's head // invalidate signals going into each request queue's head
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) val invalidate =
Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
// whether a lane is coalescable // whether a lane is coalescable
val coalescable = Output(Vec(config.numLanes, Bool())) val coalescable = Output(Vec(config.numLanes, Bool()))
}) })
val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config))) val coalescers = config.coalLogSizes.map(size =>
Module(new MonoCoalescer(size, windowT, config))
)
coalescers.foreach(_.io.window := io.window) coalescers.foreach(_.io.window := io.window)
def normalize(valPerSize: Seq[UInt]): Seq[UInt] = { def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
@@ -530,9 +575,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req
val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W)) val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
val chosenValid = Wire(Bool()) val chosenValid = Wire(Bool())
// minimum 25% coverage // minimum 25% coverage
val minCoverage = 1.max(1 << ((config.maxCoalLogSize - config.wordSizeWidth) - 2)) val minCoverage =
1.max(1 << ((config.maxCoalLogSize - config.wordSizeWidth) - 2))
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { when(normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
chosenSizeIdx := argMax(normalizedHits) chosenSizeIdx := argMax(normalizedHits)
chosenValid := true.B chosenValid := true.B
printf("coalescing success by coverage policy\n") printf("coalescing success by coverage policy\n")
@@ -562,9 +608,14 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools) val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
// check for word alignment in addresses // check for word alignment in addresses
assert(io.window.elts.flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U)).zip( assert(
io.window.mask.flatMap(_.asBools)).map { case (aligned, valid) => (!valid) || aligned }.reduce(_ || _), io.window.elts
"one or more addresses used for coalescing is not word-aligned") .flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U))
.zip(io.window.mask.flatMap(_.asBools))
.map { case (aligned, valid) => (!valid) || aligned }
.reduce(_ || _),
"one or more addresses used for coalescing is not word-aligned"
)
// note: this is word-level coalescing. if finer granularity is needed, need to modify code // note: this is word-level coalescing. if finer granularity is needed, need to modify code
val numWords = (1.U << (chosenSize - config.wordSizeWidth.U)).asUInt val numWords = (1.U << (chosenSize - config.wordSizeWidth.U)).asUInt
@@ -579,18 +630,29 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req
val sel = flatReqs.zip(flatMatches).map { case (req, m) => val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
// note: ANDing against addrMask is to conform to active byte lanes requirements // note: ANDing against addrMask is to conform to active byte lanes requirements
// if aligning to LSB suffices, we should add the bitwise AND back // if aligning to LSB suffices, we should add the bitwise AND back
m && ((req.address(config.maxCoalLogSize - 1, config.wordSizeWidth)/* & addrMask*/) === i.U) m && ((req.address(
config.maxCoalLogSize - 1,
config.wordSizeWidth
) /* & addrMask*/ ) === i.U)
} }
// TODO: SW uses priority encoder, not sure about behavior of MuxCase // TODO: SW uses priority encoder, not sure about behavior of MuxCase
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) => data(i) := MuxCase(
s -> req.data DontCare,
}) flatReqs.zip(sel).map { case (req, s) =>
mask(i) := MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) => s -> req.data
s -> req.mask }
}) )
mask(i) := MuxCase(
0.U,
flatReqs.zip(sel).map { case (req, s) =>
s -> req.mask
}
)
} }
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))) val sourceGen = Module(
new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))
)
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
sourceGen.io.reclaim.valid := false.B // not used sourceGen.io.reclaim.valid := false.B // not used
sourceGen.io.reclaim.bits := DontCare // not used sourceGen.io.reclaim.bits := DontCare // not used
@@ -608,7 +670,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req
io.invalidate.bits := chosenBundle.matchOH io.invalidate.bits := chosenBundle.matchOH
io.invalidate.valid := io.coalReq.fire // invalidate only when fire io.invalidate.valid := io.coalReq.fire // invalidate only when fire
io.coalescable := coalescers.map(_.io.results.canCoalesce.asUInt).reduce(_ | _).asBools io.coalescable := coalescers
.map(_.io.results.canCoalesce.asUInt)
.reduce(_ | _)
.asBools
dontTouch(io.invalidate) // debug dontTouch(io.invalidate) // debug
@@ -620,21 +685,30 @@ class MultiCoalescer(windowT: CoalShiftQueue[NonCoalescedRequest], coalReqT: Req
if (!config.enable) disable if (!config.enable) disable
} }
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) { class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
require(outer.cpuNode.in.length == config.numLanes, extends LazyModuleImp(outer) {
require(
outer.cpuNode.in.length == config.numLanes,
s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as " + s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as " +
s"config.numLanes (${config.numLanes})") s"config.numLanes (${config.numLanes})"
require(outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds), )
require(
outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds),
s"TL param sourceBits (${outer.cpuNode.in.head._1.params.sourceBits}) " + s"TL param sourceBits (${outer.cpuNode.in.head._1.params.sourceBits}) " +
s"mismatch with log2(config.numOldSrcIds) (${log2Ceil(config.numOldSrcIds)})") s"mismatch with log2(config.numOldSrcIds) (${log2Ceil(config.numOldSrcIds)})"
require(outer.cpuNode.in.head._1.params.addressBits == config.addressWidth, )
require(
outer.cpuNode.in.head._1.params.addressBits == config.addressWidth,
s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " + s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " +
s"mismatch with config.addressWidth (${config.addressWidth})") s"mismatch with config.addressWidth (${config.addressWidth})"
)
val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits
// note we are using word size. assuming all coalescer inputs are word sized // note we are using word size. assuming all coalescer inputs are word sized
val reqQueueEntryT = new NonCoalescedRequest(config) val reqQueueEntryT = new NonCoalescedRequest(config)
val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)) val reqQueues = Module(
new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)
)
val coalReqT = new CoalescedRequest(config) val coalReqT = new CoalescedRequest(config)
val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config)) val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config))
@@ -710,7 +784,6 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
tlCoal.e.valid := false.B tlCoal.e.valid := false.B
// =========================================================================== // ===========================================================================
// Response flow // Response flow
// =========================================================================== // ===========================================================================
@@ -723,8 +796,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
val numPerLaneReqs = config.queueDepth val numPerLaneReqs = config.queueDepth
// FIXME: no need to contain maxCoalLogSize data // FIXME: no need to contain maxCoalLogSize data
val respQueueEntryT = new Response(oldSourceWidth, log2Ceil(config.maxCoalLogSize), val respQueueEntryT = new Response(
(1 << config.maxCoalLogSize) * 8) oldSourceWidth,
log2Ceil(config.maxCoalLogSize),
(1 << config.maxCoalLogSize) * 8
)
val respQueues = Seq.tabulate(config.numLanes) { _ => val respQueues = Seq.tabulate(config.numLanes) { _ =>
Module( Module(
new MultiPortQueue( new MultiPortQueue(
@@ -810,12 +886,14 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
val newEntry = Wire(uncoalescer.inflightTable.entryT) val newEntry = Wire(uncoalescer.inflightTable.entryT)
newEntry.source := coalescer.io.coalReq.bits.source newEntry.source := coalescer.io.coalReq.bits.source
assert (config.maxCoalLogSize <= config.dataBusWidth, assert(
"multi-beat coalesced reads/writes are currently not supported") config.maxCoalLogSize <= config.dataBusWidth,
assert ( "multi-beat coalesced reads/writes are currently not supported"
)
assert(
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
+ s" (${(1 << config.dataBusWidth) * 8})" + s" (${(1 << config.dataBusWidth) * 8})"
) )
val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits) val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits)
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
@@ -825,8 +903,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
(laneEntry.reqs zip laneInv.asBools).zipWithIndex (laneEntry.reqs zip laneInv.asBools).zipWithIndex
.foreach { case ((reqEntry, inv), i) => .foreach { case ((reqEntry, inv), i) =>
val req = reqQueues.io.elts(lane)(i) val req = reqQueues.io.elts(lane)(i)
when ((coalescer.io.invalidate.valid && inv)) { when((coalescer.io.invalidate.valid && inv)) {
printf(s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", req.source) printf(
s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n",
req.source
)
} }
reqEntry.valid := (coalescer.io.invalidate.valid && inv) reqEntry.valid := (coalescer.io.invalidate.valid && inv)
reqEntry.source := req.source reqEntry.source := req.source
@@ -845,22 +926,23 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
tlCoal.d.ready := uncoalescer.io.coalResp.ready tlCoal.d.ready := uncoalescer.io.coalResp.ready
// Connect uncoalescer results back into each lane's response queue // Connect uncoalescer results back into each lane's response queue
(respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) => (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach {
perLaneResps.zipWithIndex.foreach { case (resp, i) => case ((q, perLaneResps), lane) =>
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream perLaneResps.zipWithIndex.foreach { case (resp, i) =>
// cache. This should ideally not happen though. // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
assert( // cache. This should ideally not happen though.
q.io.enq(respQueueUncoalPortOffset + i).ready, assert(
s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}" q.io.enq(respQueueUncoalPortOffset + i).ready,
) s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}"
q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid )
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits
// debug // debug
// when (resp.valid) { // when (resp.valid) {
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
// } // }
// dontTouch(q.io.enq(respQueueCoalPortOffset)) // dontTouch(q.io.enq(respQueueCoalPortOffset))
} }
} }
// Debug // Debug
@@ -972,7 +1054,8 @@ class Uncoalescer(config: CoalescerConfig) extends Module {
// split the coalesced response back to individual per-lane responses with the // split the coalesced response back to individual per-lane responses with the
// right metadata. // right metadata.
class InflightCoalReqTable(config: CoalescerConfig) extends Module { class InflightCoalReqTable(config: CoalescerConfig) extends Module {
val offsetBits = config.maxCoalLogSize - config.wordSizeWidth // assumes word offset val offsetBits =
config.maxCoalLogSize - config.wordSizeWidth // assumes word offset
val entryT = new InflightCoalReqTableEntry( val entryT = new InflightCoalReqTableEntry(
config.numLanes, config.numLanes,
config.queueDepth, config.queueDepth,
@@ -1019,7 +1102,7 @@ class InflightCoalReqTable(config: CoalescerConfig) extends Module {
} }
val full = Wire(Bool()) val full = Wire(Bool())
full := (0 until entries).map( table(_).valid ).reduce( _ && _ ) full := (0 until entries).map(table(_).valid).reduce(_ && _)
assert(!full, "inflight table is full and blocking coalescer") assert(!full, "inflight table is full and blocking coalescer")
dontTouch(full) dontTouch(full)
@@ -1094,8 +1177,12 @@ object TLUtils {
// `traceHasSource` is true if the input trace file has an additional source // `traceHasSource` is true if the input trace file has an additional source
// ID column. This is useful for using the output trace file genereated by // ID column. This is useful for using the output trace file genereated by
// MemTraceLogger as the driver. // MemTraceLogger as the driver.
class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: Boolean = false) class MemTraceDriver(
(implicit p: Parameters) extends LazyModule { config: CoalescerConfig,
filename: String,
traceHasSource: Boolean = false
)(implicit p: Parameters)
extends LazyModule {
// Create N client nodes together // Create N client nodes together
val laneNodes = Seq.tabulate(config.numLanes) { i => val laneNodes = Seq.tabulate(config.numLanes) { i =>
val clientParam = Seq( val clientParam = Seq(
@@ -1113,7 +1200,8 @@ class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource:
val node = TLIdentityNode() val node = TLIdentityNode()
laneNodes.foreach { l => node := l } laneNodes.foreach { l => node := l }
lazy val module = new MemTraceDriverImp(this, config, filename, traceHasSource) lazy val module =
new MemTraceDriverImp(this, config, filename, traceHasSource)
} }
trait HasTraceLine { trait HasTraceLine {
@@ -1136,9 +1224,12 @@ class TraceLine extends Bundle with HasTraceLine {
val data = UInt(64.W) val data = UInt(64.W)
} }
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String, class MemTraceDriverImp(
traceHasSource: Boolean) outer: MemTraceDriver,
extends LazyModuleImp(outer) config: CoalescerConfig,
filename: String,
traceHasSource: Boolean
) extends LazyModuleImp(outer)
with UnitTestModule { with UnitTestModule {
// Current cycle mark to read from trace // Current cycle mark to read from trace
val traceReadCycle = RegInit(1.U(64.W)) val traceReadCycle = RegInit(1.U(64.W))
@@ -1176,7 +1267,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
// Not all fire because trace cycle has to advance even when there is no valid // Not all fire because trace cycle has to advance even when there is no valid
// line in the trace. // line in the trace.
when (reqQueueAllReady){ when(reqQueueAllReady) {
traceReadCycle := traceReadCycle + 1.U traceReadCycle := traceReadCycle + 1.U
} }
@@ -1216,11 +1307,16 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
sizeInBytes := (1.U) << req.size sizeInBytes := (1.U) << req.size
mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U) mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data) wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) val wordAlignedAddress =
req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.size) val wordAlignedSize = Mux(subword, 2.U, req.size)
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numOldSrcIds), val sourceGen = Module(
ignoreInUse = false)) new RoundRobinSourceGenerator(
log2Ceil(config.numOldSrcIds),
ignoreInUse = false
)
)
sourceGen.io.gen := reqQ.io.deq.fire sourceGen.io.gen := reqQ.io.deq.fire
// assert(sourceGen.io.id.valid) // assert(sourceGen.io.id.valid)
@@ -1229,7 +1325,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
toAddress = hashToValidPhyAddr(wordAlignedAddress), toAddress = hashToValidPhyAddr(wordAlignedAddress),
lgSize = wordAlignedSize, // trace line already holds log2(size) lgSize = wordAlignedSize, // trace line already holds log2(size)
// data should be aligned to beatBytes // data should be aligned to beatBytes
data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt data =
(wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
) )
val (glegal, gbits) = edge.Get( val (glegal, gbits) = edge.Get(
fromSource = sourceGen.io.id.bits, fromSource = sourceGen.io.id.bits,
@@ -1240,7 +1337,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
val bits = Mux(req.is_store, pbits, gbits) val bits = Mux(req.is_store, pbits, gbits)
tlOut.a.valid := (reqQ.io.deq.valid && sourceGen.io.id.valid) tlOut.a.valid := (reqQ.io.deq.valid && sourceGen.io.id.valid)
when (tlOut.a.valid) { when(tlOut.a.valid) {
assert(legal, "illegal TL req gen") assert(legal, "illegal TL req gen")
} }
tlOut.a.bits := bits tlOut.a.bits := bits
@@ -1288,9 +1385,11 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean) class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean)
extends BlackBox( extends BlackBox(
Map("FILENAME" -> filename, Map(
"NUM_LANES" -> numLanes, "FILENAME" -> filename,
"HAS_SOURCE" -> (if (traceHasSource) 1 else 0)) "NUM_LANES" -> numLanes,
"HAS_SOURCE" -> (if (traceHasSource) 1 else 0)
)
) )
with HasBlackBoxResource { with HasBlackBoxResource {
val traceLineT = new TraceLine val traceLineT = new TraceLine
@@ -1304,19 +1403,20 @@ class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean)
// These names have to match declarations in the Verilog code, eg. // These names have to match declarations in the Verilog code, eg.
// trace_read_address. // trace_read_address.
val trace_read = new Bundle { // can't use HasTraceLine because this doesn't have source val trace_read =
val ready = Input(Bool()) new Bundle { // can't use HasTraceLine because this doesn't have source
val valid = Output(UInt(numLanes.W)) val ready = Input(Bool())
// Chisel can't interface with Verilog 2D port, so flatten all lanes into val valid = Output(UInt(numLanes.W))
// single wide 1D array. // Chisel can't interface with Verilog 2D port, so flatten all lanes into
// TODO: assumes 64-bit address. // single wide 1D array.
val cycle = Input(UInt(64.W)) // TODO: assumes 64-bit address.
val address = Output(UInt((addrW * numLanes).W)) val cycle = Input(UInt(64.W))
val is_store = Output(UInt(numLanes.W)) val address = Output(UInt((addrW * numLanes).W))
val size = Output(UInt((sizeW * numLanes).W)) val is_store = Output(UInt(numLanes.W))
val data = Output(UInt((dataW * numLanes).W)) val size = Output(UInt((sizeW * numLanes).W))
val finished = Output(Bool()) val data = Output(UInt((dataW * numLanes).W))
} val finished = Output(Bool())
}
}) })
addResource("/vsrc/SimMemTrace.v") addResource("/vsrc/SimMemTrace.v")
@@ -1443,11 +1543,11 @@ class MemTraceLogger(
// This assert only holds true for PutFullData and not PutPartialData, // This assert only holds true for PutFullData and not PutPartialData,
// where HIGH bits in the mask may not be contiguous. // where HIGH bits in the mask may not be contiguous.
when (tlIn.a.valid) { when(tlIn.a.valid) {
assert( assert(
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
"mask HIGH popcount do not match the TL size. " + "mask HIGH popcount do not match the TL size. " +
"Partial masks are not allowed for PutFull" "Partial masks are not allowed for PutFull"
) )
} }
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
@@ -1476,17 +1576,25 @@ class MemTraceLogger(
// stats // stats
val numReqsThisCycle = val numReqsThisCycle =
laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 } laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce {
(v0, v1) => v0 + v1
}
val numRespsThisCycle = val numRespsThisCycle =
laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 } laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce {
(v0, v1) => v0 + v1
}
val reqBytesThisCycle = val reqBytesThisCycle =
laneReqs.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) => laneReqs
b0 + b1 .map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }
} .reduce { (b0, b1) =>
b0 + b1
}
val respBytesThisCycle = val respBytesThisCycle =
laneResps.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) => laneResps
b0 + b1 .map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }
} .reduce { (b0, b1) =>
b0 + b1
}
numReqs := numReqs + numReqsThisCycle numReqs := numReqs + numReqsThisCycle
numResps := numResps + numRespsThisCycle numResps := numResps + numRespsThisCycle
reqBytes := reqBytes + reqBytesThisCycle reqBytes := reqBytes + reqBytesThisCycle
@@ -1496,7 +1604,10 @@ class MemTraceLogger(
// //
// This is a clunky workaround of the fact that Chisel doesn't allow partial // This is a clunky workaround of the fact that Chisel doesn't allow partial
// assignment to a bitfield range of a wide signal. // assignment to a bitfield range of a wide signal.
def flattenTrace(simIO: Bundle with HasTraceLine, perLane: Vec[TraceLine]) = { def flattenTrace(
simIO: Bundle with HasTraceLine,
perLane: Vec[TraceLine]
) = {
// these will get optimized out // these will get optimized out
val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid))) val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid)))
val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source))) val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source)))
@@ -1592,8 +1703,14 @@ object TLPrintf {
tlData: UInt, tlData: UInt,
reqData: UInt reqData: UInt
) = { ) = {
printf(s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d", printf(
source, address, size, mask, is_store) s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d",
source,
address,
size,
mask,
is_store
)
when(is_store) { when(is_store) {
printf(", tlData=%x, reqData=%x", tlData, reqData) printf(", tlData=%x, reqData=%x", tlData, reqData)
} }
@@ -1604,7 +1721,7 @@ object TLPrintf {
// Synthesizable unit tests // Synthesizable unit tests
class DummyDriver(config: CoalescerConfig)(implicit p: Parameters) class DummyDriver(config: CoalescerConfig)(implicit p: Parameters)
extends LazyModule { extends LazyModule {
val laneNodes = Seq.tabulate(config.numLanes) { i => val laneNodes = Seq.tabulate(config.numLanes) { i =>
val clientParam = Seq( val clientParam = Seq(
TLMasterParameters.v1( TLMasterParameters.v1(
@@ -1640,7 +1757,10 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig)
// generate dummy traffic to coalescer to prevent it from being optimized // generate dummy traffic to coalescer to prevent it from being optimized
// out during synthesis // out during synthesis
val address = Wire(UInt(config.addressWidth.W)) val address = Wire(UInt(config.addressWidth.W))
address := Cat((finishCounter + (lane.U % 3.U)), 0.U(config.wordSizeWidth.W)) address := Cat(
(finishCounter + (lane.U % 3.U)),
0.U(config.wordSizeWidth.W)
)
val (tl, edge) = node.out(0) val (tl, edge) = node.out(0)
val (legal, bits) = edge.Put( val (legal, bits) = edge.Put(
fromSource = sourceIdCounter, fromSource = sourceIdCounter,
@@ -1657,11 +1777,13 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig)
tl.e.valid := false.B tl.e.valid := false.B
} }
val dataSum = outer.laneNodes.map { node => val dataSum = outer.laneNodes
val tl = node.out(0)._1 .map { node =>
val data = Mux(tl.d.valid, tl.d.bits.data, 0.U) val tl = node.out(0)._1
data val data = Mux(tl.d.valid, tl.d.bits.data, 0.U)
}.reduce (_ +& _) data
}
.reduce(_ +& _)
// this doesn't make much sense, but it prevents the entire uncoalescer from // this doesn't make much sense, but it prevents the entire uncoalescer from
// being optimized away // being optimized away
finishCounter := finishCounter + dataSum finishCounter := finishCounter + dataSum
@@ -1680,8 +1802,10 @@ class DummyCoalescer(implicit p: Parameters) extends LazyModule {
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), new TLRAM(
beatBytes = (1 << config.dataBusWidth)) address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << config.dataBusWidth)
)
) )
) )
@@ -1704,7 +1828,8 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters)
} }
// tracedriver --> coalescer --> tracelogger --> tlram // tracedriver --> coalescer --> tracelogger --> tlram
class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends LazyModule { class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters)
extends LazyModule {
val numLanes = p(SIMTCoreKey).get.nLanes val numLanes = p(SIMTCoreKey).get.nLanes
val config = defaultConfig.copy(numLanes = numLanes) val config = defaultConfig.copy(numLanes = numLanes)
@@ -1713,14 +1838,18 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends Laz
new MemTraceLogger(numLanes, filename, loggerName = "coreside") new MemTraceLogger(numLanes, filename, loggerName = "coreside")
) )
val coal = LazyModule(new CoalescingUnit(config)) val coal = LazyModule(new CoalescingUnit(config))
val memSideLogger = LazyModule(new MemTraceLogger(numLanes + 1, filename, loggerName = "memside")) val memSideLogger = LazyModule(
new MemTraceLogger(numLanes + 1, filename, loggerName = "memside")
)
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
LazyModule( LazyModule(
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), new TLRAM(
beatBytes = (1 << config.dataBusWidth)) address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << config.dataBusWidth)
)
) )
) )
@@ -1751,8 +1880,9 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends Laz
} }
} }
class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit p: Parameters) class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit
extends UnitTest(timeout) { p: Parameters
) extends UnitTest(timeout) {
val dut = Module(LazyModule(new TLRAMCoalescerLogger(filename)).module) val dut = Module(LazyModule(new TLRAMCoalescerLogger(filename)).module)
dut.io.start := io.start dut.io.start := io.start
io.finished := dut.io.finished io.finished := dut.io.finished
@@ -1770,8 +1900,10 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), new TLRAM(
beatBytes = (1 << defaultConfig.dataBusWidth)) address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << defaultConfig.dataBusWidth)
)
) )
) )
@@ -1785,13 +1917,13 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
} }
} }
class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) { class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters)
extends UnitTest(timeout) {
val dut = Module(LazyModule(new TLRAMCoalescer).module) val dut = Module(LazyModule(new TLRAMCoalescer).module)
dut.io.start := io.start dut.io.start := io.start
io.finished := dut.io.finished io.finished := dut.io.finished
} }
//////////// ////////////
//////////// ////////////
//////////// ////////////
@@ -1941,11 +2073,3 @@ class CoalescerXbarImpl(outer: CoalescerXbar,
} }