Merge branch 'graphics' of https://github.com/hansungk/rocket-chip into graphics

This commit is contained in:
Richard Yan
2023-05-11 21:50:42 -07:00
2 changed files with 498 additions and 503 deletions

View File

@@ -4,7 +4,6 @@ package freechips.rocketchip.tilelink
import chisel3._ import chisel3._
import chisel3.util._ import chisel3.util._
import chisel3.experimental.ChiselEnum
import org.chipsalliance.cde.config.{Parameters, Field} import org.chipsalliance.cde.config.{Parameters, Field}
import freechips.rocketchip.diplomacy._ import freechips.rocketchip.diplomacy._
// import freechips.rocketchip.devices.tilelink.TLTestRAM // import freechips.rocketchip.devices.tilelink.TLTestRAM
@@ -42,6 +41,13 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum {
} }
} }
// Mapping to reference model param names
// numLanes: Int, <-> config.NUM_LANES
// numPerLaneReqs: Int, <-> config.DEPTH
// sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS)
// sizeWidth: Int, <-> config.sizeEnum.width
// coalDataWidth: Int, <-> (1 << config.MAX_SIZE)
// numInflightCoalRequests: Int <-> config.NUM_NEW_IDS
case class CoalescerConfig( case class CoalescerConfig(
enable: Boolean, // globally enable or disable coalescing enable: Boolean, // globally enable or disable coalescing
numLanes: Int, // number of lanes (or threads) in a warp numLanes: Int, // number of lanes (or threads) in a warp
@@ -135,7 +141,11 @@ class CoalescingUnit(config: CoalescerConfig)(implicit p: Parameters) extends La
lazy val module = new CoalescingUnitImp(this, config) lazy val module = new CoalescingUnitImp(this, config)
} }
class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int) extends Bundle { // Protocol-agnostic bundles that represent a request and a response to the
// coalescer.
class Request(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWidth: Int)
extends Bundle {
require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8") require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8")
val op = UInt(1.W) // 0=READ 1=WRITE val op = UInt(1.W) // 0=READ 1=WRITE
val address = UInt(addressWidth.W) val address = UInt(addressWidth.W)
@@ -149,7 +159,7 @@ class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWid
fromSource = this.source, fromSource = this.source,
toAddress = this.address, toAddress = this.address,
lgSize = this.size, lgSize = this.size,
data = this.data, data = this.data
) )
val (glegal, gbits) = edgeOut.Get( val (glegal, gbits) = edgeOut.Get(
fromSource = this.source, fromSource = this.source,
@@ -162,8 +172,24 @@ class ReqQueueEntry(sourceWidth: Int, sizeWidth: Int, addressWidth: Int, dataWid
bits bits
} }
} }
case class NonCoalescedRequest(config: CoalescerConfig)
extends Request(
sourceWidth = log2Ceil(config.numOldSrcIds),
sizeWidth = config.wordSizeWidth,
addressWidth = config.addressWidth,
dataWidth = config.wordSizeInBytes * 8
)
case class CoalescedRequest(config: CoalescerConfig)
extends Request(
sourceWidth = log2Ceil(config.numNewSrcIds),
sizeWidth = log2Ceil(config.maxCoalLogSize),
addressWidth = config.addressWidth,
dataWidth = (8 * (1 << config.maxCoalLogSize))
)
class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends Bundle { class Response(sourceWidth: Int, sizeWidth: Int, dataWidth: Int)
extends Bundle {
require(dataWidth % 8 == 0, s"dataWidth (${dataWidth} bits) is not multiple of 8")
val op = UInt(1.W) // 0=READ 1=WRITE val op = UInt(1.W) // 0=READ 1=WRITE
val size = UInt(sizeWidth.W) val size = UInt(sizeWidth.W)
val source = UInt(sourceWidth.W) val source = UInt(sourceWidth.W)
@@ -191,10 +217,23 @@ class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, dataWidth: Int) extends B
this.error := bundle.denied this.error := bundle.denied
} }
} }
case class NonCoalescedResponse(config: CoalescerConfig)
extends Response(
sourceWidth = log2Ceil(config.numOldSrcIds),
sizeWidth = config.wordSizeWidth,
dataWidth = config.wordSizeInBytes * 8
)
case class CoalescedResponse(config: CoalescerConfig)
extends Response(
sourceWidth = log2Ceil(config.numNewSrcIds),
sizeWidth = log2Ceil(config.maxCoalLogSize),
dataWidth = (8 * (1 << config.maxCoalLogSize))
)
// If `ignoreInUse`, just keep giving out new IDs without checking if it is in // If `ignoreInUse`, just keep giving out new IDs without checking if it is in
// use. // use.
class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module { class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true)
extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val gen = Input(Bool()) val gen = Input(Bool())
val reclaim = Input(Valid(UInt(sourceWidth.W))) val reclaim = Input(Valid(UInt(sourceWidth.W)))
@@ -221,7 +260,8 @@ class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) e
} }
} }
class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module { class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig)
extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val queue = new Bundle { val queue = new Bundle {
val enq = Vec(config.numLanes, DeqIO(gen.cloneType)) val enq = Vec(config.numLanes, DeqIO(gen.cloneType))
@@ -238,7 +278,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// eltPrototype.valid := false.B // eltPrototype.valid := false.B
val elts = Reg(Vec(config.numLanes, Vec(entries, Valid(gen)))) val elts = Reg(Vec(config.numLanes, Vec(entries, Valid(gen))))
val writePtr = RegInit(VecInit(Seq.fill(config.numLanes)(0.asUInt(log2Ceil(entries + 1).W)))) val writePtr = RegInit(
VecInit(Seq.fill(config.numLanes)(0.asUInt(log2Ceil(entries + 1).W)))
)
val deqDone = RegInit(VecInit(Seq.fill(config.numLanes)(false.B))) val deqDone = RegInit(VecInit(Seq.fill(config.numLanes)(false.B)))
private def resetElts = { private def resetElts = {
@@ -265,14 +307,17 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// current cycle. // current cycle.
// //
// shift hint is when the heads have no more coalescable left this or next cycle // shift hint is when the heads have no more coalescable left this or next cycle
val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0))).map { case (c, inv) => val shiftHint = !(io.coalescable zip io.invalidate.bits.map(_(0)))
.map { case (c, inv) =>
c && !(io.invalidate.valid && inv) c && !(io.invalidate.valid && inv)
}.reduce(_ || _) }
.reduce(_ || _)
val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _) val syncedEnqValid = io.queue.enq.map(_.valid).reduce(_ || _)
// valid && !fire means we enable enqueueing to a full queue, provided the // valid && !fire means we enable enqueueing to a full queue, provided the
// arbiter is taking away all remaining valid queue heads in the next cycle so // arbiter is taking away all remaining valid queue heads in the next cycle so
// that we make space for the entire next warp. // that we make space for the entire next warp.
val syncedDeqValidNextCycle = io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _) val syncedDeqValidNextCycle =
io.queue.deq.map(x => x.valid && !x.ready).reduce(_ || _)
for (i <- 0 until config.numLanes) { for (i <- 0 until config.numLanes) {
val enq = io.queue.enq(i) val enq = io.queue.enq(i)
@@ -299,7 +344,9 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
elt.valid := false.B elt.valid := false.B
} else { } else {
elt.bits := elts(i)(j + 1).bits elt.bits := elts(i)(j + 1).bits
elt.valid := elts(i)(j + 1).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1)) elt.valid := elts(i)(
j + 1
).valid && !(io.invalidate.valid && io.invalidate.bits(i)(j + 1))
} }
} }
// reset dequeue mask when new entries are shifted in // reset dequeue mask when new entries are shifted in
@@ -331,7 +378,8 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
// When doing spatial-only coalescing, queues should never drift from each // When doing spatial-only coalescing, queues should never drift from each
// other, i.e. the queue heads should always contain mem requests from the // other, i.e. the queue heads should always contain mem requests from the
// same instruction. // same instruction.
val queueInSync = controlSignals.map(_ === controlSignals.head).reduce(_ && _) && val queueInSync =
controlSignals.map(_ === controlSignals.head).reduce(_ && _) &&
writePtr.map(_ === writePtr.head).reduce(_ && _) writePtr.map(_ === writePtr.head).reduce(_ && _)
assert(queueInSync, "shift queue lanes are not in sync") assert(queueInSync, "shift queue lanes are not in sync")
@@ -340,18 +388,23 @@ class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) e
} }
// Software model: coalescer.py // Software model: coalescer.py
class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], class MonoCoalescer(
config: CoalescerConfig) extends Module { config: CoalescerConfig,
coalLogSize: Int,
queueT: CoalShiftQueue[NonCoalescedRequest]
) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val window = Input(windowT.io.cloneType) val window = Input(queueT.io.cloneType)
val results = Output(new Bundle { val results = Output(new Bundle {
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
val baseAddr = Output(UInt(config.addressWidth.W)) val baseAddr = Output(UInt(config.addressWidth.W))
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W))) val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
// number of entries matched with this leader lane's head. // number of entries matched with this leader lane's head.
// maximum is numLanes * queueDepth // maximum is numLanes * queueDepth
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W)) val matchCount =
val coverageHits = Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W)) Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W))
val coverageHits =
Output(UInt((config.maxCoalLogSize - config.wordSizeWidth + 1).W))
val canCoalesce = Output(Vec(config.numLanes, Bool())) val canCoalesce = Output(Vec(config.numLanes, Bool()))
}) })
}) })
@@ -366,8 +419,12 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
def printQueueHeads = { def printQueueHeads = {
leaders.zipWithIndex.foreach { case (head, i) => leaders.zipWithIndex.foreach { case (head, i) =>
printf(s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n", printf(
leadersValid(i), head.source, head.address) s"ReqQueueEntry[${i}].head = v:%d, source:%d, addr:%x\n",
leadersValid(i),
head.source,
head.address
)
} }
} }
// when (leadersValid.reduce(_ || _)) { // when (leadersValid.reduce(_ || _)) {
@@ -376,7 +433,7 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
val size = coalLogSize val size = coalLogSize
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = { def canMatch(req0: Request, req0v: Bool, req1: Request, req1v: Bool): Bool = {
(req0.op === req1.op) && (req0.op === req1.op) &&
(req0v && req1v) && (req0v && req1v) &&
((req0.address & this.addrMask) === (req1.address & this.addrMask)) ((req0.address & this.addrMask) === (req1.address & this.addrMask))
@@ -385,10 +442,13 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
// Gives a 2-D table of Bools representing match at every queue entry, // Gives a 2-D table of Bools representing match at every queue entry,
// for each lane (so 3-D in total). // for each lane (so 3-D in total).
// dimensions: (leader lane, follower lane, follower entry) // dimensions: (leader lane, follower lane, follower entry)
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) => val matchTablePerLane = (leaders zip leadersValid).map {
(io.window.elts zip io.window.mask).map { case (followers, followerValids) => case (leader, leaderValid) =>
(io.window.elts zip io.window.mask).map {
case (followers, followerValids) =>
// compare leader's head against follower's every queue entry // compare leader's head against follower's every queue entry
(followers zip followerValids.asBools).map { case (follower, followerValid) => (followers zip followerValids.asBools).map {
case (follower, followerValid) =>
canMatch(follower, followerValid, leader, leaderValid) canMatch(follower, followerValid, leader, leaderValid)
// FIXME: disabling halving optimization because it does not give the // FIXME: disabling halving optimization because it does not give the
// correct per-lane coalescable indication to the shift queue // correct per-lane coalescable indication to the shift queue
@@ -401,18 +461,23 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
} }
val matchCounts = matchTablePerLane.map(table => val matchCounts = matchTablePerLane.map(table =>
table.map(PopCount(_)) // sum up each column table
.reduce(_ +& _)) .map(PopCount(_)) // sum up each column
.reduce(_ +& _)
)
val canCoalesce = matchCounts.map(_ > 1.U) val canCoalesce = matchCounts.map(_ > 1.U)
// Elect the leader that has the most match counts. // Elect the leader that has the most match counts.
// TODO: potentially expensive: magnitude comparator // TODO: potentially expensive: magnitude comparator
def chooseLeaderArgMax(matchCounts: Seq[UInt]): UInt = { def chooseLeaderArgMax(matchCounts: Seq[UInt]): UInt = {
matchCounts.zipWithIndex.map { matchCounts.zipWithIndex
case (c, i) => (c, i.U) .map { case (c, i) =>
}.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) => (c, i.U)
}
.reduce[(UInt, UInt)] { case ((c0, i), (c1, j)) =>
(Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j)) (Mux(c0 >= c1, c0, c1), Mux(c0 >= c1, i, j))
}._2 }
._2
} }
// Elect leader by choosing the smallest-index lane that has a valid // Elect leader by choosing the smallest-index lane that has a valid
// match, i.e. using priority encoder. // match, i.e. using priority encoder.
@@ -422,7 +487,8 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts) val chosenLeaderIdx = chooseLeaderPriorityEncoder(matchCounts)
val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux val chosenLeader = VecInit(leaders)(chosenLeaderIdx) // mux
// matchTable for the chosen lane, but converted to a Vec[UInt] // matchTable for the chosen lane, but each column converted to bitflags,
// i.e. Vec[UInt]
val chosenMatches = VecInit(matchTablePerLane.map { table => val chosenMatches = VecInit(matchTablePerLane.map { table =>
VecInit(table.map(VecInit(_).asUInt)) VecInit(table.map(VecInit(_).asUInt))
})(chosenLeaderIdx) })(chosenLeaderIdx)
@@ -431,14 +497,17 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
// coverage calculation // coverage calculation
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth) def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordSizeWidth)
// 2-D table flattened to 1-D // 2-D table flattened to 1-D
val offsets = io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address))) val offsets =
io.window.elts.flatMap(_.map(req => getOffsetSlice(req.address)))
val valids = chosenMatches.flatMap(_.asBools) val valids = chosenMatches.flatMap(_.asBools)
// indicates for each word in the coalesced chunk whether it is accessed by // indicates for each word in the coalesced chunk whether it is accessed by
// any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four // any of the requests in the queue. e.g. if [ 1 1 1 1 ], all of the four
// words in the coalesced data coming back will be accessed by some request // words in the coalesced data coming back will be accessed by some request
// and we've reached 100% bandwidth utilization. // and we've reached 100% bandwidth utilization.
val hits = Seq.tabulate(1 << (size - config.wordSizeWidth)) { target => val hits = Seq.tabulate(1 << (size - config.wordSizeWidth)) { target =>
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _) (offsets zip valids)
.map { case (offset, valid) => valid && (offset === target.U) }
.reduce(_ || _)
} }
// debug prints // debug prints
@@ -471,20 +540,28 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
// coalesced request out of all possible combinations. // coalesced request out of all possible combinations.
// //
// Software model: coalescer.py // Software model: coalescer.py
class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry, class MultiCoalescer(
config: CoalescerConfig) extends Module { config: CoalescerConfig,
queueT: CoalShiftQueue[NonCoalescedRequest],
coalReqT: Request,
) extends Module {
val invalidateT = Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))
val io = IO(new Bundle { val io = IO(new Bundle {
// coalescing window, connected to the contents of the request queues // coalescing window, connected to the contents of the request queues
val window = Input(windowT.io.cloneType) val window = Input(queueT.io.cloneType)
// generated coalesced request // generated coalesced request
val coalReq = DecoupledIO(coalReqT.cloneType) val coalReq = DecoupledIO(coalReqT.cloneType)
// invalidate signals going into each request queue's head // invalidate signals going into each request queue's head. Lanes with
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) // high invalidate bits are what became coalesced into the new request.
// whether a lane is coalescable val invalidate = Output(invalidateT)
// whether a lane is coalescable. This is used to output non-coalescable
// lanes to the arbiter so they can be flushed to downstream.
val coalescable = Output(Vec(config.numLanes, Bool())) val coalescable = Output(Vec(config.numLanes, Bool()))
}) })
val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config))) val coalescers = config.coalLogSizes.map(size =>
Module(new MonoCoalescer(config, size, queueT))
)
coalescers.foreach(_.io.window := io.window) coalescers.foreach(_.io.window := io.window)
def normalize(valPerSize: Seq[UInt]): Seq[UInt] = { def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
@@ -509,7 +586,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W)) val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
val chosenValid = Wire(Bool()) val chosenValid = Wire(Bool())
// minimum 25% coverage // minimum 25% coverage
val minCoverage = 1.max(1 << ((config.maxCoalLogSize - config.wordSizeWidth) - 2)) val minCoverage =
1.max(1 << ((config.maxCoalLogSize - config.wordSizeWidth) - 2))
when(normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { when(normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
chosenSizeIdx := argMax(normalizedHits) chosenSizeIdx := argMax(normalizedHits)
@@ -541,9 +619,14 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
val flatMatches = chosenBundle.matchOH.flatMap(_.asBools) val flatMatches = chosenBundle.matchOH.flatMap(_.asBools)
// check for word alignment in addresses // check for word alignment in addresses
assert(io.window.elts.flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U)).zip( assert(
io.window.mask.flatMap(_.asBools)).map { case (aligned, valid) => (!valid) || aligned }.reduce(_ || _), io.window.elts
"one or more addresses used for coalescing is not word-aligned") .flatMap(_.map(req => req.address(config.wordSizeWidth - 1, 0) === 0.U))
.zip(io.window.mask.flatMap(_.asBools))
.map { case (aligned, valid) => (!valid) || aligned }
.reduce(_ || _),
"one or more addresses used for coalescing is not word-aligned"
)
// note: this is word-level coalescing. if finer granularity is needed, need to modify code // note: this is word-level coalescing. if finer granularity is needed, need to modify code
val numWords = (1.U << (chosenSize - config.wordSizeWidth.U)).asUInt val numWords = (1.U << (chosenSize - config.wordSizeWidth.U)).asUInt
@@ -558,18 +641,29 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
val sel = flatReqs.zip(flatMatches).map { case (req, m) => val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
// note: ANDing against addrMask is to conform to active byte lanes requirements // note: ANDing against addrMask is to conform to active byte lanes requirements
// if aligning to LSB suffices, we should add the bitwise AND back // if aligning to LSB suffices, we should add the bitwise AND back
m && ((req.address(config.maxCoalLogSize - 1, config.wordSizeWidth)/* & addrMask*/) === i.U) m && ((req.address(
config.maxCoalLogSize - 1,
config.wordSizeWidth
) /* & addrMask*/ ) === i.U)
} }
// TODO: SW uses priority encoder, not sure about behavior of MuxCase // TODO: SW uses priority encoder, not sure about behavior of MuxCase
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) => data(i) := MuxCase(
DontCare,
flatReqs.zip(sel).map { case (req, s) =>
s -> req.data s -> req.data
}) }
mask(i) := MuxCase(0.U, flatReqs.zip(sel).map { case (req, s) => )
mask(i) := MuxCase(
0.U,
flatReqs.zip(sel).map { case (req, s) =>
s -> req.mask s -> req.mask
}) }
)
} }
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))) val sourceGen = Module(
new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))
)
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
sourceGen.io.reclaim.valid := false.B // not used sourceGen.io.reclaim.valid := false.B // not used
sourceGen.io.reclaim.bits := DontCare // not used sourceGen.io.reclaim.bits := DontCare // not used
@@ -587,7 +681,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
io.invalidate.bits := chosenBundle.matchOH io.invalidate.bits := chosenBundle.matchOH
io.invalidate.valid := io.coalReq.fire // invalidate only when fire io.invalidate.valid := io.coalReq.fire // invalidate only when fire
io.coalescable := coalescers.map(_.io.results.canCoalesce.asUInt).reduce(_ | _).asBools io.coalescable := coalescers
.map(_.io.results.canCoalesce.asUInt)
.reduce(_ | _)
.asBools
dontTouch(io.invalidate) // debug dontTouch(io.invalidate) // debug
@@ -599,26 +696,36 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
if (!config.enable) disable if (!config.enable) disable
} }
class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends LazyModuleImp(outer) { class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig)
require(outer.cpuNode.in.length == config.numLanes, extends LazyModuleImp(outer) {
require(
outer.cpuNode.in.length == config.numLanes,
s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as " + s"number of incoming edges (${outer.cpuNode.in.length}) is not the same as " +
s"config.numLanes (${config.numLanes})") s"config.numLanes (${config.numLanes})"
require(outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds), )
require(
outer.cpuNode.in.head._1.params.sourceBits == log2Ceil(config.numOldSrcIds),
s"TL param sourceBits (${outer.cpuNode.in.head._1.params.sourceBits}) " + s"TL param sourceBits (${outer.cpuNode.in.head._1.params.sourceBits}) " +
s"mismatch with log2(config.numOldSrcIds) (${log2Ceil(config.numOldSrcIds)})") s"mismatch with log2(config.numOldSrcIds) (${log2Ceil(config.numOldSrcIds)})"
require(outer.cpuNode.in.head._1.params.addressBits == config.addressWidth, )
require(
outer.cpuNode.in.head._1.params.addressBits == config.addressWidth,
s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " + s"TL param addressBits (${outer.cpuNode.in.head._1.params.addressBits}) " +
s"mismatch with config.addressWidth (${config.addressWidth})") s"mismatch with config.addressWidth (${config.addressWidth})"
)
require(
config.maxCoalLogSize <= config.dataBusWidth,
"multi-beat coalesced reads/writes are currently not supported"
)
val sourceWidth = outer.cpuNode.in.head._1.params.sourceBits val oldSourceWidth = outer.cpuNode.in.head._1.params.sourceBits
// note we are using word size. assuming all coalescer inputs are word sized val nonCoalReqT = new NonCoalescedRequest(config)
val reqQueueEntryT = new ReqQueueEntry(sourceWidth, config.wordSizeWidth, val reqQueues = Module(
config.addressWidth, (config.wordSizeInBytes * 8)) new CoalShiftQueue(nonCoalReqT, config.queueDepth, config)
val reqQueues = Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth, config)) )
val coalReqT = new ReqQueueEntry(log2Ceil(config.numNewSrcIds), log2Ceil(config.maxCoalLogSize), val coalReqT = new CoalescedRequest(config)
config.addressWidth, (1 << config.maxCoalLogSize) * 8) val coalescer = Module(new MultiCoalescer(config, reqQueues, coalReqT))
val coalescer = Module(new MultiCoalescer(reqQueues, coalReqT, config))
coalescer.io.window := reqQueues.io coalescer.io.window := reqQueues.io
reqQueues.io.coalescable := coalescer.io.coalescable reqQueues.io.coalescable := coalescer.io.coalescable
reqQueues.io.invalidate := coalescer.io.invalidate reqQueues.io.invalidate := coalescer.io.invalidate
@@ -634,7 +741,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
(outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach { (outer.cpuNode.in zip outer.cpuNode.out).zipWithIndex.foreach {
case (((tlIn, _), (tlOut, edgeOut)), lane) => case (((tlIn, _), (tlOut, edgeOut)), lane) =>
// Request queue // Request queue
val req = Wire(reqQueueEntryT) val req = Wire(nonCoalReqT)
req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode) req.op := TLUtils.AOpcodeIsStore(tlIn.a.bits.opcode)
req.source := tlIn.a.bits.source req.source := tlIn.a.bits.source
@@ -691,6 +798,11 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
tlCoal.e.valid := false.B tlCoal.e.valid := false.B
require(
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
+ s" (${(1 << config.dataBusWidth) * 8})"
)
// =========================================================================== // ===========================================================================
// Response flow // Response flow
@@ -703,8 +815,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth). // coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
val numPerLaneReqs = config.queueDepth val numPerLaneReqs = config.queueDepth
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), // FIXME: no need to contain maxCoalLogSize data
(1 << config.maxCoalLogSize) * 8) val respQueueEntryT = new Response(
oldSourceWidth,
log2Ceil(config.maxCoalLogSize),
(1 << config.maxCoalLogSize) * 8
)
val respQueues = Seq.tabulate(config.numLanes) { _ => val respQueues = Seq.tabulate(config.numLanes) { _ =>
Module( Module(
new MultiPortQueue( new MultiPortQueue(
@@ -776,57 +892,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
dontTouch(tlOut.d) dontTouch(tlOut.d)
} }
// Construct new entry for the inflight table val uncoalescer = Module(
// FIXME: don't instantiate inflight table entry type here. It leaks the table's impl new Uncoalescer(config, nonCoalReqT, coalReqT)
// detail to the coalescer
// richard: I think a good idea is to pass Valid[ReqQueueEntry] generated by
// the coalescer directly into the uncoalescer, so that we can offload the
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
// this also reduces top level clutter.
val uncoalescer = Module(new Uncoalescer(config))
val newEntry = Wire(uncoalescer.inflightTable.entryT)
newEntry.source := coalescer.io.coalReq.bits.source
assert (config.maxCoalLogSize <= config.dataBusWidth,
"multi-beat coalesced reads/writes are currently not supported")
assert (
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
+ s" (${(1 << config.dataBusWidth) * 8})"
) )
// connect coalesced request that is newly generated and being recorded in
// the uncoalescer
uncoalescer.io.coalReq <> coalescer.io.coalReq
uncoalescer.io.invalidate := coalescer.io.invalidate
val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits) val reqQueueHeads = reqQueues.io.queue.deq.map(_.bits)
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the uncoalescer.io.windowElts := reqQueues.io.elts
// coalescer to every (numLanes * queueDepth) entry in the inflight table. // connect coalesced response going into the uncoalescer, ready to be
(newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex // uncoalesced
.foreach { case ((laneEntry, laneInv), lane) =>
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
.foreach { case ((reqEntry, inv), i) =>
val req = reqQueues.io.elts(lane)(i)
when ((coalescer.io.invalidate.valid && inv)) {
printf(s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n", req.source)
}
reqEntry.valid := (coalescer.io.invalidate.valid && inv)
reqEntry.source := req.source
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth)
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
// TODO: load/store op
}
}
dontTouch(newEntry)
uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid
uncoalescer.io.newEntry := newEntry
// Cleanup: custom <>? // Cleanup: custom <>?
uncoalescer.io.coalResp.valid := tlCoal.d.valid uncoalescer.io.coalResp.valid := tlCoal.d.valid
uncoalescer.io.coalResp.bits.source := tlCoal.d.bits.source uncoalescer.io.coalResp.bits.fromTLD(tlCoal.d.bits)
uncoalescer.io.coalResp.bits.data := tlCoal.d.bits.data // uncoalescer backpressure
tlCoal.d.ready := uncoalescer.io.coalResp.ready tlCoal.d.ready := uncoalescer.io.coalResp.ready
// Connect uncoalescer results back into each lane's response queue // Connect uncoalescer results back into each lane's response queue
(respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) => (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach {
case ((q, perLaneResps), lane) =>
perLaneResps.zipWithIndex.foreach { case (resp, i) => perLaneResps.zipWithIndex.foreach { case (resp, i) =>
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
// cache. This should ideally not happen though. // cache. This should ideally not happen though.
@@ -853,67 +938,78 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
dontTouch(tlCoal.d) dontTouch(tlCoal.d)
} }
// Protocol-agnostic bundle that represents a coalesced response. class Uncoalescer(
// config: CoalescerConfig,
// Having this makes it easier to: nonCoalReqT: NonCoalescedRequest,
// * do unit tests -- no need to deal with TileLink in the chiseltest code coalReqT: CoalescedRequest,
// * adapt coalescer to custom protocols like a custom L1 cache interface. ) extends Module {
//
// FIXME: overlaps with RespQueueEntry. Trait-ify
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
val source = UInt(log2Ceil(config.numNewSrcIds).W)
val data = UInt((8 * (1 << config.maxCoalLogSize)).W)
def fromTLD(bundle:TLBundleD): Unit = {
this.source := bundle.source
this.data := bundle.data
}
}
class Uncoalescer(config: CoalescerConfig) extends Module {
// notes to hansung:
// val numLanes: Int, <-> config.NUM_LANES
// val numPerLaneReqs: Int, <-> config.DEPTH
// val sourceWidth: Int, <-> log2ceil(config.NUM_OLD_IDS)
// val sizeWidth: Int, <-> config.sizeEnum.width
// val coalDataWidth: Int, <-> (1 << config.MAX_SIZE)
// val numInflightCoalRequests: Int <-> config.NUM_NEW_IDS
val inflightTable = Module(new InflightCoalReqTable(config)) val inflightTable = Module(new InflightCoalReqTable(config))
val io = IO(new Bundle { val io = IO(new Bundle {
val coalReqValid = Input(Bool()) // generated coalesced request, connected to the output of the coalescer.
// FIXME: receive ReqQueueEntry and construct newEntry inside uncoalescer val coalReq = Flipped(DecoupledIO(coalReqT.cloneType))
val newEntry = Input(inflightTable.entryT.cloneType) // invalidate signal coming out of coalescer.
val coalResp = Flipped(Decoupled(new CoalescedResponseBundle(config))) val invalidate = Input(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
// coalescing window, connected to the contents of the request queues.
// Uncoalescer looks at the queue entries that got coalesced into `coalReq`
// in order to record which lanes this coalReq originally came from.
// We only care about window.elts because the coalescer would have made
// sure it only looked at the valid entries.
// TODO: duplicate type construction
val windowElts = Input(Vec(config.numLanes, Vec(config.queueDepth, nonCoalReqT)))
val coalResp = Flipped(Decoupled(new CoalescedResponse(config)))
val uncoalResps = Output( val uncoalResps = Output(
Vec( Vec(
config.numLanes, config.numLanes,
Vec( Vec(config.queueDepth, ValidIO(new NonCoalescedResponse(config)))
config.queueDepth,
ValidIO(
new RespQueueEntry(log2Ceil(config.numOldSrcIds), config.wordSizeWidth,
config.wordSizeInBytes * 8)
)
)
) )
) )
}) })
// Populate inflight table // Uncoalescer has to be always ready to accept and record new coalesced
inflightTable.io.enq.valid := io.coalReqValid // requests, so that it doesn't stall the coalescer.
inflightTable.io.enq.bits := io.newEntry io.coalReq.ready := true.B
// Construct a new entry for the inflight table using generated coalesced request
def generateInflightTableEntry: InflightCoalReqTableEntry = {
val newEntry = Wire(inflightTable.entryT)
newEntry.source := io.coalReq.bits.source
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
// coalescer to every (numLanes * queueDepth) entry in the inflight table.
(newEntry.lanes zip io.invalidate.bits).zipWithIndex
.foreach { case ((laneEntry, laneInv), lane) =>
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
.foreach { case ((reqEntry, inv), i) =>
val req = io.windowElts(lane)(i)
when((io.invalidate.valid && inv)) {
printf(
s"coalescer: reqQueue($lane)($i) got invalidated (source=%d)\n",
req.source
)
}
reqEntry.valid := (io.invalidate.valid && inv)
reqEntry.source := req.source
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordSizeWidth)
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
// TODO: load/store op
}
}
assert(
!((io.coalReq.valid === true.B) && (io.coalResp.valid === true.B) &&
(newEntry.source === io.coalResp.bits.source)),
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
)
dontTouch(newEntry)
newEntry
}
inflightTable.io.enq.valid := io.coalReq.valid
inflightTable.io.enq.bits := generateInflightTableEntry
// Look up the table with incoming coalesced responses // Look up the table with incoming coalesced responses
inflightTable.io.lookup.ready := io.coalResp.valid inflightTable.io.lookup.ready := io.coalResp.valid
inflightTable.io.lookupSourceId := io.coalResp.bits.source inflightTable.io.lookupSourceId := io.coalResp.bits.source
io.coalResp.ready := true.B // FIXME, see sw model implementation io.coalResp.ready := true.B // FIXME, see sw model implementation
assert(
!((io.coalReqValid === true.B) && (io.coalResp.valid === true.B) &&
(io.newEntry.source === io.coalResp.bits.source)),
"inflight table: enqueueing and looking up the same srcId at the same cycle is not handled"
)
// Un-coalescing logic // Un-coalescing logic
// //
def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = { def getCoalescedDataChunk(data: UInt, dataWidth: Int, offset: UInt, logSize: UInt): UInt = {
@@ -972,7 +1068,8 @@ class Uncoalescer(config: CoalescerConfig) extends Module {
// split the coalesced response back to individual per-lane responses with the // split the coalesced response back to individual per-lane responses with the
// right metadata. // right metadata.
class InflightCoalReqTable(config: CoalescerConfig) extends Module { class InflightCoalReqTable(config: CoalescerConfig) extends Module {
val offsetBits = config.maxCoalLogSize - config.wordSizeWidth // assumes word offset val offsetBits =
config.maxCoalLogSize - config.wordSizeWidth // assumes word offset
val entryT = new InflightCoalReqTableEntry( val entryT = new InflightCoalReqTableEntry(
config.numLanes, config.numLanes,
config.queueDepth, config.queueDepth,
@@ -1094,8 +1191,12 @@ object TLUtils {
// `traceHasSource` is true if the input trace file has an additional source // `traceHasSource` is true if the input trace file has an additional source
// ID column. This is useful for using the output trace file genereated by // ID column. This is useful for using the output trace file genereated by
// MemTraceLogger as the driver. // MemTraceLogger as the driver.
class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: Boolean = false) class MemTraceDriver(
(implicit p: Parameters) extends LazyModule { config: CoalescerConfig,
filename: String,
traceHasSource: Boolean = false
)(implicit p: Parameters)
extends LazyModule {
// Create N client nodes together // Create N client nodes together
val laneNodes = Seq.tabulate(config.numLanes) { i => val laneNodes = Seq.tabulate(config.numLanes) { i =>
val clientParam = Seq( val clientParam = Seq(
@@ -1113,7 +1214,8 @@ class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource:
val node = TLIdentityNode() val node = TLIdentityNode()
laneNodes.foreach { l => node := l } laneNodes.foreach { l => node := l }
lazy val module = new MemTraceDriverImp(this, config, filename, traceHasSource) lazy val module =
new MemTraceDriverImp(this, config, filename, traceHasSource)
} }
trait HasTraceLine { trait HasTraceLine {
@@ -1136,9 +1238,12 @@ class TraceLine extends Bundle with HasTraceLine {
val data = UInt(64.W) val data = UInt(64.W)
} }
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String, class MemTraceDriverImp(
traceHasSource: Boolean) outer: MemTraceDriver,
extends LazyModuleImp(outer) config: CoalescerConfig,
filename: String,
traceHasSource: Boolean
) extends LazyModuleImp(outer)
with UnitTestModule { with UnitTestModule {
// Current cycle mark to read from trace // Current cycle mark to read from trace
val traceReadCycle = RegInit(1.U(64.W)) val traceReadCycle = RegInit(1.U(64.W))
@@ -1216,11 +1321,16 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
sizeInBytes := (1.U) << req.size sizeInBytes := (1.U) << req.size
mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U) mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data) wordData := Mux(subword, req.data << (offsetInWord * 8.U), req.data)
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) val wordAlignedAddress =
req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.size) val wordAlignedSize = Mux(subword, 2.U, req.size)
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numOldSrcIds), val sourceGen = Module(
ignoreInUse = false)) new RoundRobinSourceGenerator(
log2Ceil(config.numOldSrcIds),
ignoreInUse = false
)
)
sourceGen.io.gen := reqQ.io.deq.fire sourceGen.io.gen := reqQ.io.deq.fire
// assert(sourceGen.io.id.valid) // assert(sourceGen.io.id.valid)
@@ -1229,7 +1339,8 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
toAddress = hashToValidPhyAddr(wordAlignedAddress), toAddress = hashToValidPhyAddr(wordAlignedAddress),
lgSize = wordAlignedSize, // trace line already holds log2(size) lgSize = wordAlignedSize, // trace line already holds log2(size)
// data should be aligned to beatBytes // data should be aligned to beatBytes
data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt data =
(wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
) )
val (glegal, gbits) = edge.Get( val (glegal, gbits) = edge.Get(
fromSource = sourceGen.io.id.bits, fromSource = sourceGen.io.id.bits,
@@ -1288,9 +1399,11 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean) class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean)
extends BlackBox( extends BlackBox(
Map("FILENAME" -> filename, Map(
"FILENAME" -> filename,
"NUM_LANES" -> numLanes, "NUM_LANES" -> numLanes,
"HAS_SOURCE" -> (if (traceHasSource) 1 else 0)) "HAS_SOURCE" -> (if (traceHasSource) 1 else 0)
)
) )
with HasBlackBoxResource { with HasBlackBoxResource {
val traceLineT = new TraceLine val traceLineT = new TraceLine
@@ -1304,7 +1417,8 @@ class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean)
// These names have to match declarations in the Verilog code, eg. // These names have to match declarations in the Verilog code, eg.
// trace_read_address. // trace_read_address.
val trace_read = new Bundle { // can't use HasTraceLine because this doesn't have source val trace_read =
new Bundle { // can't use HasTraceLine because this doesn't have source
val ready = Input(Bool()) val ready = Input(Bool())
val valid = Output(UInt(numLanes.W)) val valid = Output(UInt(numLanes.W))
// Chisel can't interface with Verilog 2D port, so flatten all lanes into // Chisel can't interface with Verilog 2D port, so flatten all lanes into
@@ -1476,15 +1590,23 @@ class MemTraceLogger(
// stats // stats
val numReqsThisCycle = val numReqsThisCycle =
laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 } laneReqs.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce {
(v0, v1) => v0 + v1
}
val numRespsThisCycle = val numRespsThisCycle =
laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce { (v0, v1) => v0 + v1 } laneResps.map { l => Mux(l.valid, 1.U(64.W), 0.U(64.W)) }.reduce {
(v0, v1) => v0 + v1
}
val reqBytesThisCycle = val reqBytesThisCycle =
laneReqs.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) => laneReqs
.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }
.reduce { (b0, b1) =>
b0 + b1 b0 + b1
} }
val respBytesThisCycle = val respBytesThisCycle =
laneResps.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }.reduce { (b0, b1) => laneResps
.map { l => Mux(l.valid, 1.U(64.W) << l.size, 0.U(64.W)) }
.reduce { (b0, b1) =>
b0 + b1 b0 + b1
} }
numReqs := numReqs + numReqsThisCycle numReqs := numReqs + numReqsThisCycle
@@ -1496,7 +1618,10 @@ class MemTraceLogger(
// //
// This is a clunky workaround of the fact that Chisel doesn't allow partial // This is a clunky workaround of the fact that Chisel doesn't allow partial
// assignment to a bitfield range of a wide signal. // assignment to a bitfield range of a wide signal.
def flattenTrace(simIO: Bundle with HasTraceLine, perLane: Vec[TraceLine]) = { def flattenTrace(
simIO: Bundle with HasTraceLine,
perLane: Vec[TraceLine]
) = {
// these will get optimized out // these will get optimized out
val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid))) val vecValid = Wire(Vec(numLanes, chiselTypeOf(perLane(0).valid)))
val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source))) val vecSource = Wire(Vec(numLanes, chiselTypeOf(perLane(0).source)))
@@ -1592,8 +1717,14 @@ object TLPrintf {
tlData: UInt, tlData: UInt,
reqData: UInt reqData: UInt
) = { ) = {
printf(s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d", printf(
source, address, size, mask, is_store) s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d",
source,
address,
size,
mask,
is_store
)
when(is_store) { when(is_store) {
printf(", tlData=%x, reqData=%x", tlData, reqData) printf(", tlData=%x, reqData=%x", tlData, reqData)
} }
@@ -1640,7 +1771,10 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig)
// generate dummy traffic to coalescer to prevent it from being optimized // generate dummy traffic to coalescer to prevent it from being optimized
// out during synthesis // out during synthesis
val address = Wire(UInt(config.addressWidth.W)) val address = Wire(UInt(config.addressWidth.W))
address := Cat((finishCounter + (lane.U % 3.U)), 0.U(config.wordSizeWidth.W)) address := Cat(
(finishCounter + (lane.U % 3.U)),
0.U(config.wordSizeWidth.W)
)
val (tl, edge) = node.out(0) val (tl, edge) = node.out(0)
val (legal, bits) = edge.Put( val (legal, bits) = edge.Put(
fromSource = sourceIdCounter, fromSource = sourceIdCounter,
@@ -1657,11 +1791,13 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig)
tl.e.valid := false.B tl.e.valid := false.B
} }
val dataSum = outer.laneNodes.map { node => val dataSum = outer.laneNodes
.map { node =>
val tl = node.out(0)._1 val tl = node.out(0)._1
val data = Mux(tl.d.valid, tl.d.bits.data, 0.U) val data = Mux(tl.d.valid, tl.d.bits.data, 0.U)
data data
}.reduce (_ +& _) }
.reduce(_ +& _)
// this doesn't make much sense, but it prevents the entire uncoalescer from // this doesn't make much sense, but it prevents the entire uncoalescer from
// being optimized away // being optimized away
finishCounter := finishCounter + dataSum finishCounter := finishCounter + dataSum
@@ -1680,8 +1816,10 @@ class DummyCoalescer(implicit p: Parameters) extends LazyModule {
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), new TLRAM(
beatBytes = (1 << config.dataBusWidth)) address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << config.dataBusWidth)
)
) )
) )
@@ -1704,7 +1842,8 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters)
} }
// tracedriver --> coalescer --> tracelogger --> tlram // tracedriver --> coalescer --> tracelogger --> tlram
class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends LazyModule { class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters)
extends LazyModule {
val numLanes = p(SIMTCoreKey).get.nLanes val numLanes = p(SIMTCoreKey).get.nLanes
val config = defaultConfig.copy(numLanes = numLanes) val config = defaultConfig.copy(numLanes = numLanes)
@@ -1713,14 +1852,18 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends Laz
new MemTraceLogger(numLanes, filename, loggerName = "coreside") new MemTraceLogger(numLanes, filename, loggerName = "coreside")
) )
val coal = LazyModule(new CoalescingUnit(config)) val coal = LazyModule(new CoalescingUnit(config))
val memSideLogger = LazyModule(new MemTraceLogger(numLanes + 1, filename, loggerName = "memside")) val memSideLogger = LazyModule(
new MemTraceLogger(numLanes + 1, filename, loggerName = "memside")
)
val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge val rams = Seq.fill(numLanes + 1)( // +1 for coalesced edge
LazyModule( LazyModule(
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), new TLRAM(
beatBytes = (1 << config.dataBusWidth)) address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << config.dataBusWidth)
)
) )
) )
@@ -1751,8 +1894,9 @@ class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends Laz
} }
} }
class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit p: Parameters) class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit
extends UnitTest(timeout) { p: Parameters
) extends UnitTest(timeout) {
val dut = Module(LazyModule(new TLRAMCoalescerLogger(filename)).module) val dut = Module(LazyModule(new TLRAMCoalescerLogger(filename)).module)
dut.io.start := io.start dut.io.start := io.start
io.finished := dut.io.finished io.finished := dut.io.finished
@@ -1770,8 +1914,10 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
// NOTE: beatBytes here sets the data bitwidth of the upstream TileLink // NOTE: beatBytes here sets the data bitwidth of the upstream TileLink
// edges globally, by way of Diplomacy communicating the TL slave // edges globally, by way of Diplomacy communicating the TL slave
// parameters to the upstream nodes. // parameters to the upstream nodes.
new TLRAM(address = AddressSet(0x0000, 0xffffff), new TLRAM(
beatBytes = (1 << defaultConfig.dataBusWidth)) address = AddressSet(0x0000, 0xffffff),
beatBytes = (1 << defaultConfig.dataBusWidth)
)
) )
) )
@@ -1785,13 +1931,13 @@ class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
} }
} }
class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) { class TLRAMCoalescerTest(timeout: Int = 500000)(implicit p: Parameters)
extends UnitTest(timeout) {
val dut = Module(LazyModule(new TLRAMCoalescer).module) val dut = Module(LazyModule(new TLRAMCoalescer).module)
dut.io.start := io.start dut.io.start := io.start
io.finished := dut.io.finished io.finished := dut.io.finished
} }
//////////// ////////////
//////////// ////////////
//////////// ////////////
@@ -1853,26 +1999,10 @@ class CoalescerXbar(config: CoalescerConfig) (implicit p: Parameters) extends La
val node = TLIdentityNode() val node = TLIdentityNode()
node :=* outputXbar.node node :=* outputXbar.node
val nonCoalEntryT = new ReqQueueEntry( val nonCoalEntryT = new NonCoalescedRequest(config)
log2Ceil(config.numOldSrcIds), val coalEntryT = new CoalescedRequest(config)
config.wordSizeWidth, val respNonCoalEntryT = new NonCoalescedResponse(config)
config.addressWidth, val respCoalBundleT = new CoalescedResponse(config)
config.wordSizeInBytes * 8
)
val coalEntryT = new ReqQueueEntry(
log2Ceil(config.numOldSrcIds),
log2Ceil(config.maxCoalLogSize),
config.addressWidth,
(1 << config.maxCoalLogSize) * 8
)
val respNonCoalEntryT = new RespQueueEntry(
log2Ceil(config.numOldSrcIds),
config.wordSizeWidth,
config.wordSizeInBytes * 8
)
val respCoalBundleT = new CoalescedResponseBundle(config)
lazy val module = new CoalescerXbarImpl( lazy val module = new CoalescerXbarImpl(
this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT) this, config, nonCoalEntryT, coalEntryT, respNonCoalEntryT, respCoalBundleT)
@@ -1883,10 +2013,10 @@ class CoalescerXbar(config: CoalescerConfig) (implicit p: Parameters) extends La
class CoalescerXbarImpl(outer: CoalescerXbar, class CoalescerXbarImpl(outer: CoalescerXbar,
config: CoalescerConfig, config: CoalescerConfig,
nonCoalEntryT: ReqQueueEntry, nonCoalEntryT: Request,
coalEntryT: ReqQueueEntry, coalEntryT: Request,
respNonCoalEntryT: RespQueueEntry, respNonCoalEntryT: Response,
respCoalBundleT: CoalescedResponseBundle respCoalBundleT: CoalescedResponse
) extends LazyModuleImp(outer){ ) extends LazyModuleImp(outer){
@@ -1957,11 +2087,3 @@ class CoalescerXbarImpl(outer: CoalescerXbar,
} }

View File

@@ -180,12 +180,12 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
) )
val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) val reqQueueEnqReady = peekIn(0).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(x.cloneType)) val reqQueueEnqBits = peekIn(1).asInstanceOf[Seq[Request]].map(x => IO(x.cloneType))
val reqQueueEnqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType)) val reqQueueEnqValid = peekIn(2).asInstanceOf[Seq[Bool]].map(x => IO(x.cloneType))
val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[ReqQueueEntry]].map(x => IO(Output(x.cloneType))) val reqQueueDeqBits = peekIn(3).asInstanceOf[Seq[Request]].map(x => IO(Output(x.cloneType)))
val reqQueueDeqValid = peekIn(4).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType))) val reqQueueDeqValid = peekIn(4).asInstanceOf[Seq[Bool]].map(x => IO(Output(x.cloneType)))
val coalReqReady = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType)) val coalReqReady = IO(Output(peekIn(5).asInstanceOf[Bool].cloneType))
val coalReqBits = IO(Output(peekIn(6).asInstanceOf[ReqQueueEntry].cloneType)) val coalReqBits = IO(Output(peekIn(6).asInstanceOf[Request].cloneType))
val coalReqValid = IO(Output(peekIn(7).asInstanceOf[Bool].cloneType)) val coalReqValid = IO(Output(peekIn(7).asInstanceOf[Bool].cloneType))
val coalInvalidate = IO(Output(peekIn(8).asInstanceOf[Valid[Vec[UInt]]].cloneType)) val coalInvalidate = IO(Output(peekIn(8).asInstanceOf[Valid[Vec[UInt]]].cloneType))
@@ -759,14 +759,15 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
}*/ }*/
} }
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "uncoalescer"
object uncoalescerTestConfig extends CoalescerConfig( object uncoalescerTestConfig extends CoalescerConfig(
enable = true, enable = true,
numLanes = 4, numLanes = 4,
queueDepth = 2, queueDepth = 2,
waitTimeout = 8, waitTimeout = 8,
addressWidth = 24, addressWidth = 24,
dataBusWidth = 5, dataBusWidth = 4, // 128 bit data bus
// watermark = 2,
wordSizeInBytes = 4, wordSizeInBytes = 4,
numOldSrcIds = 16, numOldSrcIds = 16,
numNewSrcIds = 4, numNewSrcIds = 4,
@@ -778,48 +779,50 @@ object uncoalescerTestConfig extends CoalescerConfig(
bankStrideInBytes = 64, bankStrideInBytes = 64,
) )
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { val config = uncoalescerTestConfig
behavior of "uncoalescer"
val numLanes = 4 val nonCoalReqT = new NonCoalescedRequest(config)
val numPerLaneReqs = 2 val coalReqT = new CoalescedRequest(config)
val sourceWidth = 2
val sizeWidth = 2
// 16B coalescing size
val coalDataWidth = 128
val numInflightCoalRequests = 4
it should "work in general case" in { it should "work in general case" in {
test(new Uncoalescer(uncoalescerTestConfig)) test(new Uncoalescer(config, nonCoalReqT, coalReqT))
// vcs helps with simulation time, but sometimes errors with // vcs helps with simulation time, but sometimes errors with
// "mutation occurred during iteration" java error // "mutation occurred during iteration" java error
// .withAnnotations(Seq(VcsBackendAnnotation)) // .withAnnotations(Seq(VcsBackendAnnotation))
{ c => { c =>
// 4 lanes, queue depth 2
c.io.windowElts(0)(0).op.poke(0.U)
c.io.windowElts(0)(0).source.poke(1.U)
c.io.windowElts(0)(0).address.poke(0x4.U)
c.io.windowElts(0)(0).size.poke(2.U)
c.io.windowElts(0)(1).op.poke(0.U)
c.io.windowElts(0)(1).source.poke(2.U)
c.io.windowElts(0)(1).address.poke(0x4.U) // two reqs from one lane
c.io.windowElts(0)(1).size.poke(2.U)
c.io.windowElts(2)(0).op.poke(0.U)
c.io.windowElts(2)(0).source.poke(2.U)
c.io.windowElts(2)(0).address.poke(0x8.U)
c.io.windowElts(2)(0).size.poke(2.U)
c.io.windowElts(2)(1).op.poke(0.U)
c.io.windowElts(2)(1).source.poke(2.U)
c.io.windowElts(2)(1).address.poke(0xc.U)
c.io.windowElts(2)(1).size.poke(2.U)
// indicate lane 0 and 2 are used for coalescing
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits(0).poke(0x3.U) // 2'b11 for depth=2
c.io.invalidate.bits(1).poke(0x0.U)
c.io.invalidate.bits(2).poke(0x3.U)
c.io.invalidate.bits(3).poke(0x0.U)
val sourceId = 0.U val sourceId = 0.U
val four = c.io.newEntry.sizeEnumT.FOUR c.io.coalReq.valid.poke(true.B)
c.io.coalReqValid.poke(true.B) c.io.coalReq.bits.source.poke(sourceId)
c.io.newEntry.source.poke(sourceId) c.io.coalReq.ready.expect(true.B)
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(0).source.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes
c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four)
c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B)
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(0).source.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(1).source.poke(2.U)
c.io.newEntry.lanes(2).reqs(1).offset.poke(3.U)
c.io.newEntry.lanes(2).reqs(1).sizeEnum.poke(four)
c.io.newEntry.lanes(3).reqs(0).valid.poke(false.B)
c.clock.step() c.clock.step()
c.io.coalReqValid.poke(false.B) c.io.coalReq.valid.poke(false.B)
c.io.invalidate.valid.poke(false.B)
c.clock.step() c.clock.step()
@@ -848,37 +851,42 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
} }
it should "uncoalesce when coalesced to the same word offset" in { it should "uncoalesce when coalesced to the same word offset" in {
test(new Uncoalescer(uncoalescerTestConfig)) test(new Uncoalescer(config, nonCoalReqT, coalReqT))
// .withAnnotations(Seq(VcsBackendAnnotation)) // .withAnnotations(Seq(VcsBackendAnnotation))
{ c => { c =>
// 4 lanes, queue depth 2
c.io.windowElts(0)(0).op.poke(0.U)
c.io.windowElts(0)(0).source.poke(0.U)
c.io.windowElts(0)(0).address.poke(0x4.U)
c.io.windowElts(0)(0).size.poke(2.U)
c.io.windowElts(1)(0).op.poke(0.U)
c.io.windowElts(1)(0).source.poke(1.U)
c.io.windowElts(1)(0).address.poke(0x4.U) // two reqs from one lane
c.io.windowElts(1)(0).size.poke(2.U)
c.io.windowElts(2)(0).op.poke(0.U)
c.io.windowElts(2)(0).source.poke(2.U)
c.io.windowElts(2)(0).address.poke(0x4.U)
c.io.windowElts(2)(0).size.poke(2.U)
c.io.windowElts(3)(0).op.poke(0.U)
c.io.windowElts(3)(0).source.poke(3.U)
c.io.windowElts(3)(0).address.poke(0x4.U)
c.io.windowElts(3)(0).size.poke(2.U)
// indicate lanes used for coalescing
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits(0).poke(0x1.U) // 2'b01 for enabling head
c.io.invalidate.bits(1).poke(0x1.U)
c.io.invalidate.bits(2).poke(0x1.U)
c.io.invalidate.bits(3).poke(0x1.U)
val sourceId = 0.U val sourceId = 0.U
val four = c.io.newEntry.sizeEnumT.FOUR c.io.coalReq.valid.poke(true.B)
c.io.coalReqValid.poke(true.B) c.io.coalReq.bits.source.poke(sourceId)
c.io.newEntry.source.poke(sourceId) c.io.coalReq.ready.expect(true.B)
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(0).source.poke(0.U)
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B)
c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(1).reqs(0).source.poke(1.U)
c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B)
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(0).source.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B)
c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(3).reqs(0).source.poke(3.U)
c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B)
c.clock.step() c.clock.step()
c.io.coalReqValid.poke(false.B) c.io.coalReq.valid.poke(false.B)
c.io.invalidate.valid.poke(false.B)
c.clock.step() c.clock.step()
@@ -908,138 +916,3 @@ class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
} }
} }
} }
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "inflight coalesced request table"
val numLanes = 4
val numPerLaneReqs = 2
val sourceWidth = 2
val entries = 4
val offsetBits = 4
val sizeBits = 2
val inflightCoalReqTableEntry =
new InflightCoalReqTableEntry(
numLanes,
numPerLaneReqs,
sourceWidth,
offsetBits,
testConfig.sizeEnum
)
// it should "stop enqueueing when full" in {
// test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c =>
// // fill up the table
// for (i <- 0 until entries) {
// val sourceId = i
// c.io.enq.ready.expect(true.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(sourceId.U)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.io.lookup.ready.poke(false.B)
// c.clock.step()
// }
// // now cannot enqueue any more
// c.io.enq.ready.expect(false.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(0.U)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.clock.step()
// c.io.enq.ready.expect(false.B)
// // try to lookup all existing entries
// for (i <- 0 until entries) {
// val sourceId = i
// c.io.enq.valid.poke(false.B)
// c.io.lookup.ready.poke(true.B)
// c.io.lookupSourceId.poke(sourceId)
// c.io.lookup.valid.expect(true.B)
// c.io.lookup.bits.expect(sourceId)
// c.clock.step()
// }
// // now the table should be empty
// for (i <- 0 until entries) {
// val sourceId = i
// c.io.enq.valid.poke(false.B)
// c.io.lookup.ready.poke(true.B)
// c.io.lookupSourceId.poke(sourceId)
// c.io.lookup.valid.expect(false.B)
// c.clock.step()
// }
// }
// }
// it should "lookup matching entry" in {
// test(new InflightCoalReqTable(numLanes, sourceWidth, entries))
// .withAnnotations(Seq(WriteVcdAnnotation)) { c =>
// c.reset.poke(true.B)
// c.clock.step(10)
// c.reset.poke(false.B)
// // enqueue one entry to not match at 0th index
// c.io.enq.ready.expect(true.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(0.U)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.clock.step()
// val targetSourceId = 1.U
// c.io.enq.ready.expect(true.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(targetSourceId)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.clock.step()
// c.io.lookup.ready.poke(true.B)
// c.io.lookupSourceId.poke(targetSourceId)
// c.io.lookup.valid.expect(true.B)
// c.io.lookup.bits.expect(targetSourceId)
// c.clock.step()
// // test if matching entry dequeues after 1 cycle
// c.io.lookup.ready.poke(true.B)
// c.io.lookupSourceId.poke(targetSourceId)
// c.io.lookup.valid.expect(false.B)
// }
// }
// it should "handle lookup and enqueue at the same time" in {
// test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c =>
// // fill up the table
// val targetSourceId = 1.U
// c.io.enq.ready.expect(true.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(0.U)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.clock.step()
// c.io.enq.ready.expect(true.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(targetSourceId)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.clock.step()
// // do both enqueue and lookup at the same cycle
// val enqSourceId = 2.U
// c.io.enq.ready.expect(true.B)
// c.io.enq.valid.poke(true.B)
// c.io.enq.bits.fromLane.poke(0.U)
// c.io.enq.bits.respSourceId.poke(enqSourceId)
// c.io.enq.bits.reqSourceIds.foreach { id => id.poke(0.U) }
// c.io.lookup.ready.poke(true.B)
// c.io.lookupSourceId.poke(targetSourceId)
// c.clock.step()
// }
// }
}