Merge branch 'graphics' of https://github.com/hansungk/rocket-chip into graphics

This commit is contained in:
Richard Yan
2023-04-28 20:51:08 -07:00
2 changed files with 474 additions and 224 deletions

View File

@@ -36,29 +36,29 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum {
}
case class CoalescerConfig(
numLanes: Int, // number of lanes (or threads) in a warp
maxSize: Int, // maximum burst size (64 bytes)
queueDepth: Int, // request window per lane
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
addressWidth: Int, // assume <= 32
dataBusWidth: Int, // memory-side downstream TileLink data bus size
// this has to be at least larger than the word size for
// the coalescer to perform well
// watermark = 2, // minimum buffer occupancy to start coalescing
wordSizeInBytes: Int, // 32-bit system
wordWidth: Int, // log(WORD_SIZE)
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
numNewSrcIds: Int, // num of outstanding coalesced requests
respQueueDepth: Int, // depth of the response fifo queues
coalSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
// must be power of 2's
sizeEnum: InFlightTableSizeEnum
)
numLanes: Int, // number of lanes (or threads) in a warp
queueDepth: Int, // request window per lane
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
addressWidth: Int, // assume <= 32
dataBusWidth: Int, // memory-side downstream TileLink data bus size
// this has to be at least larger than the word size for
// the coalescer to perform well
// watermark = 2, // minimum buffer occupancy to start coalescing
wordSizeInBytes: Int, // 32-bit system
wordWidth: Int, // log(WORD_SIZE)
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
numNewSrcIds: Int, // num of outstanding coalesced requests
respQueueDepth: Int, // depth of the response fifo queues
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
// each size is log(byteSize)
sizeEnum: InFlightTableSizeEnum,
) {
// maximum coalesced size
def maxCoalLogSize: Int = coalLogSizes.max
}
object defaultConfig extends CoalescerConfig(
numLanes = 4,
// TODO: bigger size
maxSize = 3,
queueDepth = 1,
waitTimeout = 8,
addressWidth = 24,
@@ -69,7 +69,7 @@ object defaultConfig extends CoalescerConfig(
numOldSrcIds = 16,
numNewSrcIds = 4,
respQueueDepth = 4,
coalSizes = Seq(3),
coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum
)
@@ -153,10 +153,14 @@ class ReqSourceGen(sourceWidth: Int) extends Module {
// A shift-register queue implementation that supports invalidating entries
// and exposing queue contents as output IO. (TODO: support deadline)
// Initially copied from freechips.rocketchip.util.ShiftQueue.
// If `pipe` is true, support enqueueing to a full queue when also dequeueing.
// The queue only shifts down when `allowShift` is given true. Dequeueing
// works normally, but if allowShift was false, the queue head will stay
// invalid after dequeueing. This option is added in order to synchronize the
// shifting of the queues between lanes to model the SIMD behavior.
// If `pipe` is true, support enqueueing to a full queue when head is being
// dequeued at the next cycle.
// Software model: window.py
class CoalShiftQueue[T <: Data](
gen: T,
class CoalShiftQueue[T <: Data]( gen: T,
val entries: Int,
pipe: Boolean = true,
flow: Boolean = false
@@ -164,6 +168,7 @@ class CoalShiftQueue[T <: Data](
val io = IO(new Bundle {
val queue = new QueueIO(gen, entries)
val invalidate = Input(Valid(UInt(entries.W)))
val allowShift = Input(Bool())
val mask = Output(UInt(entries.W))
val elts = Output(Vec(entries, gen))
// 'QueueIO' provides io.count, but we might not want to use it in the
@@ -192,7 +197,7 @@ class CoalShiftQueue[T <: Data](
def paddedUsed = pad({ i: Int => used(i) })
def validAfterInv(i: Int) = valid(i) && (!io.invalidate.valid || !io.invalidate.bits(i))
val shift = (used =/= 0.U) && (io.queue.deq.ready || !validAfterInv(0))
val shift = io.allowShift && (used =/= 0.U) && (io.queue.deq.fire || !validAfterInv(0))
for (i <- 0 until entries) {
val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
val wen = Mux(
@@ -208,27 +213,28 @@ class CoalShiftQueue[T <: Data](
(io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
(io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i)
)
// additionally, head entry should get invalidated when dequeue fired
// but queue didn't shift (e.g. because allowShift was false)
when (io.queue.deq.fire && !shift) {
valid(0) := false.B
}
}
when(io.queue.enq.fire) {
when(!io.queue.deq.fire) {
when(!shift) {
used := (used << 1.U) | 1.U
}
}.elsewhen(io.queue.deq.fire) {
}.elsewhen(shift) {
used := used >> 1.U
}
io.queue.enq.ready := !valid(entries - 1)
// We don't want to invalidate deq.valid response right away even when
// io.invalidate(head) is true.
// Coalescing unit consumes queue head's validity, and produces its new
// validity. Deasserting deq.valid right away will result in a combinational
// cycle.
io.queue.deq.valid := valid(0)
io.queue.deq.valid := validAfterInv(0)
io.queue.deq.bits := elts.head
assert(!flow, "flow-through is not implemented")
if (flow) {
// FIXME old code
when(io.queue.enq.valid) { io.queue.deq.valid := true.B }
when(!valid(0)) { io.queue.deq.bits := io.queue.enq.bits }
}
@@ -243,7 +249,7 @@ class CoalShiftQueue[T <: Data](
}
// Software model: coalescer.py
class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
config: CoalescerConfig) extends Module {
val io = IO(new Bundle {
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
@@ -251,8 +257,10 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
val baseAddr = Output(UInt(config.addressWidth.W))
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W))
val coverageHits = Output(UInt((1 << config.maxSize).W))
// number of entries matched with this leader lane's head.
// maximum is numLanes * queueDepth
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W))
val coverageHits = Output(UInt((1 << config.maxCoalLogSize).W))
})
})
@@ -277,14 +285,12 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
leadersValid(i), head.source, head.address)
}
}
// debug assertions and prints
when (leadersValid.reduce(_ || _)) {
assert(testNoQueueDrift, "unexpected drift between lane request queues")
printQueueHeads
// printQueueHeads
}
val size = coalSize
val size = coalLogSize
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
(req0.op === req1.op) &&
@@ -294,18 +300,24 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
// Gives a 2-D table of Bools representing match at every queue entry,
// for each lane (so 3-D in total).
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) =>
// TODO: match leader to only lanes >= leader idx
io.window.map { followerLane =>
// compare leader's head against follower's every queue entry
(followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) =>
canMatch(follower, followerValid, leader, leaderValid)
val matchTablePerLane = (leaders zip leadersValid).zipWithIndex
.map { case ((leader, leaderValid), leaderIndex) =>
io.window.zipWithIndex.map { case (followerQueue, followerIndex) =>
// compare leader's head against follower's every queue entry
(followerQueue.elts zip followerQueue.mask.asBools)
.map { case (follower, followerValid) =>
// match leader to only followers at lanes >= leader idx
// this halves the number of comparators
if (followerIndex < leaderIndex) false.B
else canMatch(follower, followerValid, leader, leaderValid)
}
}
}
}
// TODO: potentially expensive: popcount & adder
val matchCounts = matchTablePerLane.map(leader => leader.map(PopCount(_)).reduce(_ +& _))
val matchCounts = matchTablePerLane.map(table =>
table.map(PopCount(_)) // sum up each column
.reduce(_ +& _))
val canCoalesce = matchCounts.map(_ > 1.U)
// TODO: potentially expensive
@@ -323,6 +335,18 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
})(chosenLeaderIdx)
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
// coverage calculation
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
// 2-D table flattened to 1-D
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
val valids = io.window.map(_.mask).flatMap(_.asBools)
// indicates whether each word in the coalesced chunk is accessed by any of the
// queue entries. e.g. if [ 1 1 1 1 ], all of the four words in the coalesced
// data has been accessed and we've reached 100% utilization.
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
}
// debug prints
when (leadersValid.reduce(_ || _)) {
matchCounts.zipWithIndex.foreach { case (count, i) =>
@@ -334,14 +358,13 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
printf("%d ", m)
}
printf("]\n")
}
printf("chosenMatchCount = %d\n", chosenMatchCount)
// coverage calculation
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
val valids = io.window.map(_.mask).flatMap(_.asBools)
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
printf("hits = [ ")
hits.foreach { m =>
printf("%d ", m)
}
printf("]\n")
}
io.results.leaderIdx := chosenLeaderIdx
@@ -354,19 +377,21 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
// Software model: coalescer.py
class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry,
config: CoalescerConfig) extends Module {
val io = IO(new Bundle {
// coalescing window, connected to the contents of the request queues
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
val outReq = DecoupledIO(coalReqT.cloneType)
// generated coalesced request
val coalReq = DecoupledIO(coalReqT.cloneType)
// invalidate signals going into each request queue's head
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
})
val coalescers = config.coalSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
coalescers.foreach(_.io.window := io.window)
def normalize(x: Seq[UInt]): Seq[UInt] = {
x.zip(config.coalSizes).map { case (hits, size) =>
(hits << (config.maxSize - size).U).asUInt
def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
(valPerSize zip config.coalLogSizes).map { case (hits, size) =>
(hits << (config.maxCoalLogSize - size).U).asUInt
}
}
@@ -378,27 +403,40 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
}._2
}
// normalize to maximum coalescing size so that we can do fair comparisons
// between coalescing results of different sizes
val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount))
val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits))
val chosenIdx = Wire(UInt(log2Ceil(config.coalSizes.size).W))
val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
val chosenValid = Wire(Bool())
// minimum 25% coverage
val minCoverage = 1.max(1 << (config.maxSize - 4))
val minCoverage = 1.max(1 << ((config.maxCoalLogSize - 2) - 2))
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
chosenIdx := argMax(normalizedHits)
chosenSizeIdx := argMax(normalizedHits)
chosenValid := true.B
printf("coalescing success by coverage policy\n")
}.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) {
chosenIdx := argMax(normalizedMatches)
chosenSizeIdx := argMax(normalizedMatches)
chosenValid := true.B
printf("coalescing success by matches policy\n")
}.otherwise {
chosenIdx := DontCare
chosenSizeIdx := DontCare
chosenValid := false.B
}
def debugPolicyPrint() = {
printf("matchCount[0]=%d\n", coalescers(0).io.results.matchCount)
printf("normalizedMatches[0]=%d\n", normalizedMatches(0))
printf("coverageHits[0]=%d\n", coalescers(0).io.results.coverageHits)
printf("normalizedHits[0]=%d\n", normalizedHits(0))
printf("minCoverage=%d\n", minCoverage.U)
}
// create coalesced request
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenIdx)
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenIdx)
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenSizeIdx)
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
// flatten requests and matches
val flatReqs = io.window.flatMap(_.elts)
@@ -411,8 +449,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
// note: this is word-level coalescing. if finer granularity is needed, need to modify code
val numWords = (1.U << (chosenSize - config.wordWidth.U)).asUInt
val maxWords = 1 << (config.maxSize - config.wordWidth)
val addrMask = Wire(UInt(config.maxSize.W))
val maxWords = 1 << (config.maxCoalLogSize - config.wordWidth)
val addrMask = Wire(UInt(config.maxCoalLogSize.W))
addrMask := (1.U << chosenSize).asUInt - 1.U
val data = Wire(Vec(maxWords, UInt((config.wordSizeInBytes * 8).W)))
@@ -420,7 +458,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
for (i <- 0 until maxWords) {
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
m && ((req.address(config.maxSize - 1, 0) & addrMask) === i.U)
m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U)
}
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
@@ -435,18 +473,20 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
}
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
io.outReq.bits.source := sourceGen.io.id.bits
io.outReq.bits.mask := mask.asUInt
io.outReq.bits.data := data.asUInt
io.outReq.bits.size := chosenSize
io.outReq.bits.address := chosenBundle.baseAddr
io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
io.outReq.valid := chosenValid && sourceGen.io.id.valid
val coalesceValid = chosenValid && sourceGen.io.id.valid
io.coalReq.bits.source := sourceGen.io.id.bits
io.coalReq.bits.mask := mask.asUInt
io.coalReq.bits.data := data.asUInt
io.coalReq.bits.size := chosenSize
io.coalReq.bits.address := chosenBundle.baseAddr
io.coalReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
io.coalReq.valid := coalesceValid
io.invalidate.bits := chosenBundle.matchOH
io.invalidate.valid := io.outReq.fire // invalidate only when fire
io.invalidate.valid := io.coalReq.fire // invalidate only when fire
dontTouch(io.invalidate) // debug
@@ -471,7 +511,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth))
}
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.addressWidth, config.maxSize)
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.addressWidth, config.maxCoalLogSize)
val coalescer = Module(new MultiCoalescer(reqQueues.head, coalReqT, config))
coalescer.io.window := reqQueues.map(_.io)
@@ -511,20 +551,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready")
reqQueue.io.queue.enq.valid := tlIn.a.valid
reqQueue.io.queue.enq.bits := req
// TODO: deq.ready should respect downstream ready
// TODO: deq.ready should respect downstream arbiter
reqQueue.io.queue.deq.ready := true.B
// invalidate queue entries that contain original core requests that got
// coalesced into a wider one
reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane)
reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid
reqQueue.io.allowShift := true.B
// NOTE: this relies on CoalShiftQueue's behavior combinationally
// deasserting deq.valid in the same cycle that the head invalidate
// signal goes up.
tlOut.a.valid := reqQueue.io.queue.deq.valid
tlOut.a.bits := reqQueue.io.queue.deq.bits.toTLA(edgeOut)
}
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
tlCoal.a.valid := coalescer.io.outReq.valid
tlCoal.a.bits := coalescer.io.outReq.bits.toTLA(edgeCoal)
coalescer.io.outReq.ready := tlCoal.a.ready
tlCoal.a.valid := coalescer.io.coalReq.valid
tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
coalescer.io.coalReq.ready := tlCoal.a.ready
tlCoal.b.ready := true.B
tlCoal.c.valid := false.B
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
@@ -541,7 +587,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
val numPerLaneReqs = config.queueDepth
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.maxSize)
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.maxCoalLogSize)
val respQueues = Seq.tabulate(config.numLanes) { _ =>
Module(
new MultiPortQueue(
@@ -550,6 +596,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// requests that didn't get coalesced, and M is the maximum number of
// single-lane requests that can go into a coalesced request.
// (`numPerLaneReqs`).
// TODO: potentially expensive, because this generates more FFs.
// Rather than enqueueing all responses in a single cycle, consider
// enqueueing one by one (at the cost of possibly stalling downstream).
1 + numPerLaneReqs,
// deq_lanes = 1 because we're serializing all responses to 1 port that
// goes back to the core.
@@ -566,7 +615,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
)
}
val respQueueNoncoalPort = 0
val respQueueCoalPortOffset = 1
val respQueueUncoalPortOffset = 1
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
@@ -645,51 +694,40 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
// this also reduces top level clutter.
val offsetBits = 4 // FIXME hardcoded
// but the width of the size enum
val newEntry = Wire(
new InflightCoalReqTableEntry(
config.numLanes,
numPerLaneReqs,
sourceWidth,
offsetBits,
config.sizeEnum
)
)
println(s"=========== table sourceWidth: ${sourceWidth}")
// println(s"=========== table sizeEnumBits: ${newEntry.sizeEnumBits}")
newEntry.source := coalescer.io.outReq.bits.source
val uncoalescer = Module(new Uncoalescer(config))
val newEntry = Wire(uncoalescer.inflightTable.entryT)
newEntry.source := coalescer.io.coalReq.bits.source
// TODO: richard to write table fill logic
// FIXME: this assertion used to say 1 << config.MAX_SIZE
// I changed this to say DATA BUS SIZE. We need another assertion
// to assert that MAX_SIZE is <= DATA_BUS_SIZE because we do not support
// multi-beat writes currently
assert(
assert (config.maxCoalLogSize <= config.dataBusWidth,
"multi-beat coalesced reads/writes are currently not supported")
assert (
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
s"tlCoal param dataBits (${tlCoal.params.dataBits}) mismatch coalescer constant"
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
+ s" (${(1 << config.dataBusWidth) * 8})"
)
val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
newEntry.lanes.foreach { l =>
l.reqs.zipWithIndex.foreach { case (r, i) =>
// TODO: this part needs the actual coalescing logic to work
r.valid := false.B
r.source := origReqs(i).source
r.offset := (origReqs(i).address % (1 << config.maxSize).U) >> config.wordWidth
r.sizeEnum := config.sizeEnum.logSizeToEnum(origReqs(i).size)
val reqQueueHeads = reqQueues.map(q => q.io.queue.deq.bits)
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
// coalescer to every (numLanes * queueDepth) entry in the inflight table.
(newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex
.foreach { case ((laneEntry, laneInv), lane) =>
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
.foreach { case ((reqEntry, inv), i) =>
val req = reqQueues(lane).io.elts(i)
when ((coalescer.io.invalidate.valid && inv)) {
printf(s"coalescer: reqQueue(${lane})(${i}) got invalidated (source=%d)\n", req.source)
}
reqEntry.valid := (coalescer.io.invalidate.valid && inv)
reqEntry.source := req.source
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordWidth)
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
// TODO: load/store op
}
}
}
newEntry.lanes(0).reqs(0).valid := true.B
newEntry.lanes(1).reqs(0).valid := true.B
newEntry.lanes(2).reqs(0).valid := true.B
newEntry.lanes(3).reqs(0).valid := true.B
dontTouch(newEntry)
// Uncoalescer module uncoalesces responses back to each lane
val uncoalescer = Module(new UncoalescingUnit(config))
uncoalescer.io.coalReqValid := coalescer.io.outReq.valid
uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid
uncoalescer.io.newEntry := newEntry
// Cleanup: custom <>?
uncoalescer.io.coalResp.valid := tlCoal.d.valid
@@ -698,22 +736,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
tlCoal.d.ready := uncoalescer.io.coalResp.ready
// Queue up synthesized uncoalesced responses into each lane's response queue
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
lanes.zipWithIndex.foreach { case (resp, i) =>
(respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) =>
perLaneResps.zipWithIndex.foreach { case (resp, i) =>
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
// cache. This should ideally not happen though.
assert(
q.io.enq(respQueueCoalPortOffset + i).ready,
s"respQueue: enq port for 0-th coalesced response is blocked"
q.io.enq(respQueueUncoalPortOffset + i).ready,
s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}"
)
q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits
// debug
// when (resp.valid) {
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
// }
// dontTouch(q.io.enq(respQueueCoalPortOffset))
}
}
// Debug
dontTouch(coalescer.io.outReq)
dontTouch(coalescer.io.coalReq)
val coalRespData = tlCoal.d.bits.data
dontTouch(coalRespData)
@@ -730,10 +772,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
// FIXME: overlaps with RespQueueEntry. Trait-ify
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
val source = UInt(log2Ceil(config.numNewSrcIds).W)
val data = UInt((8 * (1 << config.maxSize)).W)
val data = UInt((8 * (1 << config.maxCoalLogSize)).W)
}
class UncoalescingUnit(config: CoalescerConfig) extends Module {
class Uncoalescer(config: CoalescerConfig) extends Module {
// notes to hansung:
// val numLanes: Int, <-> config.NUM_LANES
// val numPerLaneReqs: Int, <-> config.DEPTH
@@ -833,19 +875,21 @@ class UncoalescingUnit(config: CoalescerConfig) extends Module {
// split the coalesced response back to individual per-lane responses with the
// right metadata.
class InflightCoalReqTable(config: CoalescerConfig) extends Module {
val offsetBits = 4 // FIXME hardcoded
val sizeBits = 2 // FIXME hardcoded
val offsetBits = config.maxCoalLogSize - config.wordWidth // assumes word offset
val entryT = new InflightCoalReqTableEntry(
config.numLanes,
config.queueDepth,
log2Ceil(config.numOldSrcIds),
config.maxSize,
config.maxCoalLogSize,
config.sizeEnum
)
val entries = config.numNewSrcIds
val sourceWidth = log2Ceil(config.numOldSrcIds)
println(s"=========== table sourceWidth: ${sourceWidth}")
println(s"=========== table sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
val io = IO(new Bundle {
val enq = Flipped(Decoupled(entryT))
// TODO: return actual stuff

View File

@@ -35,26 +35,46 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
name = "processor-nodes",
sourceId = IdRange(0, testConfig.numOldSrcIds),
// requestFifo = true,
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig)
TLClientNode(
Seq(
TLMasterPortParameters.v1(
Seq(
TLClientParameters(
name = "processor-nodes",
sourceId = IdRange(0, testConfig.numOldSrcIds),
visibility = Seq(AddressSet(0x0, 0xffffff))
)
)
)
)
) // 24 bit address space (TODO probably use testConfig)
}
val device = new SimpleDevice("dummy", Seq("dummy"))
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
val l2Nodes = Seq.tabulate(5) { _ =>
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters(
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
resources = device.reg,
regionType = RegionType.UNCACHED,
executable = true,
supportsGet = TransferSizes(1, beatBytes),
supportsPutFull = TransferSizes(1, beatBytes),
supportsPutPartial = TransferSizes(1, beatBytes),
supportsHint = TransferSizes(1, beatBytes),
fifoId = Some(0))), beatBytes)))
TLManagerNode(
Seq(
TLSlavePortParameters.v1(
Seq(
TLManagerParameters(
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
resources = device.reg,
regionType = RegionType.UNCACHED,
executable = true,
supportsArithmetic = TransferSizes(1, beatBytes),
supportsLogical = TransferSizes(1, beatBytes),
supportsGet = TransferSizes(1, beatBytes),
supportsPutFull = TransferSizes(1, beatBytes),
supportsPutPartial = TransferSizes(1, beatBytes),
supportsHint = TransferSizes(1, beatBytes),
fifoId = Some(0)
)
),
beatBytes
)
)
)
}
val dut = LazyModule(new CoalescingUnit(testConfig))
@@ -81,84 +101,116 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
// val coalMasterNode = coal.coalescerNode.makeIOs()
}
object testConfig extends CoalescerConfig(
numLanes = 4,
queueDepth = 1,
waitTimeout = 8,
addressWidth = 24,
dataBusWidth = 5,
// watermark = 2,
wordSizeInBytes = 4,
wordWidth = 2,
numOldSrcIds = 16,
numNewSrcIds = 4,
respQueueDepth = 4,
coalLogSizes = Seq(3),
sizeEnum = DefaultInFlightTableSizeEnum
)
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "multi- and mono-coalescers"
it should "coalesce fully consecutive accesses at size 4, only once" in {
implicit val p: Parameters = Parameters.empty
implicit val p: Parameters = Parameters.empty
val tb = LazyModule(new DummyCoalescingUnitTB())
// val outer = LazyModule(new CoalescingUnit(testConfig))
val coal = tb.dut
test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c =>
val nodes = c.coalIOs.map(_.head)
// val nodes = c.cpuNodesImp.map(_.out.head._1)
// val nodes = c.coal.node.in.map(_._1)
// val nodes = c.mitmNodesImp.map(_.in.head._1)
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = {
val node = nodes(idx)
def pokeA(
nodes: Seq[TLBundle],
idx: Int,
op: Int,
size: Int,
source: Int,
addr: Int,
mask: Int,
data: Int
): Unit = {
val node = nodes(idx)
// node.a.ready.expect(true.B) // FIXME: this fails currently
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
node.a.bits.param.poke(0.U)
node.a.bits.size.poke(size.U)
node.a.bits.source.poke(source.U)
node.a.bits.address.poke(addr.U)
node.a.bits.mask.poke(mask.U)
node.a.bits.data.poke(data.U)
node.a.bits.corrupt.poke(false.B)
node.a.valid.poke(true.B)
}
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
node.a.bits.param.poke(0.U)
node.a.bits.size.poke(size.U)
node.a.bits.source.poke(source.U)
node.a.bits.address.poke(addr.U)
node.a.bits.mask.poke(mask.U)
node.a.bits.data.poke(data.U)
node.a.bits.corrupt.poke(false.B)
node.a.valid.poke(true.B)
}
def unsetA(): Unit = {
nodes.foreach { node =>
node.a.valid.poke(false.B)
}
}
def unsetA(nodes: Seq[TLBundle]): Unit = {
nodes.foreach { node =>
node.a.valid.poke(false.B)
}
}
// always ready to take coalesced requests
// c.coalMasterNode.head.a.ready.poke(true.B)
// c.coal.module.coalescer.io.outReq.ready.poke(true.B)
// it should "coalesce fully consecutive accesses at size 4, only once" in {
// test(makeTb().module)
// .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
// { c =>
// println(s"coalIO length = ${c.coalIOs(0).length}")
// val nodes = c.coalIOs.map(_.head)
// // val nodes = c.cpuNodesImp.map(_.out.head._1)
// // val nodes = c.coal.node.in.map(_._1)
// // val nodes = c.mitmNodesImp.map(_.in.head._1)
pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111)
pokeA(nodes, idx=1, op=1, size=2, source=0, addr=0x14, mask=0xf, data=0x2222)
pokeA(nodes, idx=2, op=1, size=2, source=0, addr=0x18, mask=0xf, data=0x3333)
pokeA(nodes, idx=3, op=1, size=2, source=0, addr=0x1c, mask=0xf, data=0x4444)
// // always ready to take coalesced requests
// // c.coalMasterNode.head.a.ready.poke(true.B)
// // c.coal.module.coalescer.io.outReq.ready.poke(true.B)
// pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
// pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
// pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
// pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
// c.clock.step()
// unsetA(nodes)
// c.clock.step()
// c.clock.step()
// }
// }
it should "coalesce identical addresses (stride of 0)" in {
test(LazyModule(new DummyCoalescingUnitTB()).module)
.withAnnotations(Seq(VcsBackendAnnotation))
{ c =>
println(s"coalIO length = ${c.coalIOs(0).length}")
val nodes = c.coalIOs.map(_.head)
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x1111)
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x2222)
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x4444)
c.clock.step()
unsetA()
unsetA(nodes)
c.clock.step()
c.clock.step()
}
}
it should "coalesce strided accesses at size 6" in {
it should "coalesce strided accesses at size 6" in {}
}
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {}
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {
it should "not touch uncoalescable requests" in {}
}
it should "allow temporal coalescing when depth >=2" in {}
it should "not touch uncoalescable requests" in {
it should "select the most coverage mono-coalescer" in {}
}
it should "allow temporal coalescing when depth >=2" in {
}
it should "select the most coverage mono-coalescer" in {
}
it should "resort to the backup policy when coverage is below average" in {
}
it should "resort to the backup policy when coverage is below average" in {}
}
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
@@ -167,6 +219,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
it should "work like normal shiftqueue when no invalidate" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.queue.deq.ready.poke(false.B)
c.io.allowShift.poke(true.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
@@ -215,6 +268,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
it should "work when enqueing and dequeueing simultaneously" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.allowShift.poke(true.B)
// prepare
c.io.queue.deq.ready.poke(true.B)
@@ -243,9 +297,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
}
}
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
it should "not shift entries when allowShift is false" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.queue.deq.ready.poke(false.B)
c.io.allowShift.poke(false.B)
// prepare
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.clock.step()
c.io.queue.enq.valid.poke(false.B)
// dequeueing should work normally when allowShift is false...
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x12.U)
c.clock.step()
// but should stop there and not dequeue the next entry
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(false.B)
c.clock.step()
// when allowShift is back one, dequeueing should start working from next
// cycle
c.io.allowShift.poke(true.B)
c.clock.step()
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
}
}
it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in {
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.allowShift.poke(true.B)
// prepare
c.io.queue.deq.ready.poke(true.B)
@@ -282,9 +374,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
}
}
it should "invalidate head being dequeued" in {
it should "work when invalidating and enqueueing to a depth=1 queue" in {
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.allowShift.poke(true.B)
// no dequeueing
c.io.queue.deq.ready.poke(false.B)
// prepare
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x12.U)
c.clock.step()
// invalidate, but don't allow shift
c.io.allowShift.poke(false.B)
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x1.U)
// TODO: we might be able to enqueue to a full depth=1 queue whose only
// entry just got invalidated, so that enq.ready is true here, but
// it is a niche case
c.io.queue.enq.ready.expect(false.B)
c.clock.step()
// now try enqueueing now that we have space
c.io.allowShift.poke(true.B)
c.io.invalidate.valid.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x34.U)
c.io.queue.deq.valid.expect(false.B)
c.clock.step()
// see if it comes out right next cycle
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x34.U)
}
}
it should "invalidate head that is also being dequeued" in {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.allowShift.poke(true.B)
// prepare
c.io.queue.deq.ready.poke(false.B)
@@ -300,12 +430,11 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
c.io.queue.enq.valid.poke(false.B)
// invalidate should work for the head just being dequeued at the same
// cycle. However, it should not change deq.valid right away to avoid
// combinational cycles (see definition).
// cycle
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x1.U)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.valid.expect(false.B)
c.clock.step()
// 0x12 should have been dequeued
c.io.invalidate.valid.poke(false.B)
@@ -315,10 +444,12 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
}
}
it should "dequeue invalidated entries by itself" in {
it should "dequeue invalidated head on its own when allowShift" in {
test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.allowShift.poke(true.B)
// prepare
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
@@ -338,19 +469,33 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
// invalidate two entries at head
c.io.invalidate.valid.poke(true.B)
c.io.invalidate.bits.poke(0x3.U)
c.io.queue.deq.ready.poke(false.B)
// [ 0x56 | 0x34(inv) | 0x12(inv) ]
c.clock.step()
// [ 0x56 | 0x34(inv) ]
// [ 0x56 | 0x34(inv) ]
c.io.invalidate.valid.poke(false.B)
c.io.queue.deq.ready.poke(false.B)
c.clock.step()
// [ 0x56 ]
// [ 0x56 ]
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x56.U)
c.clock.step()
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(false.B)
c.clock.step()
// do one more enqueue-then-dequeue to see if used bit was properly cleared
c.io.queue.deq.ready.poke(false.B)
c.io.queue.enq.ready.expect(true.B)
c.io.queue.enq.valid.poke(true.B)
c.io.queue.enq.bits.poke(0x78.U)
c.clock.step()
// should dequeue right away
c.io.queue.enq.valid.poke(false.B)
c.io.queue.deq.ready.poke(true.B)
c.io.queue.deq.valid.expect(true.B)
c.io.queue.deq.bits.expect(0x78.U)
}
}
@@ -358,6 +503,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
c.io.invalidate.valid.poke(false.B)
c.io.invalidate.bits.poke(0.U)
c.io.allowShift.poke(true.B)
// prepare
c.io.queue.deq.ready.poke(false.B)
@@ -383,24 +529,23 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
}
}
object testConfig extends CoalescerConfig(
maxSize = 5,
object uncoalescerTestConfig extends CoalescerConfig(
numLanes = 4,
queueDepth = 2,
waitTimeout = 8,
addressWidth = 24,
dataBusWidth = 5,
numLanes = 4,
// watermark = 2,
wordSizeInBytes = 4,
wordWidth = 2,
numOldSrcIds = 16,
numNewSrcIds = 4,
respQueueDepth = 4,
coalSizes = Seq(4, 5),
coalLogSizes = Seq(4),
sizeEnum = DefaultInFlightTableSizeEnum
)
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "uncoalescer"
val numLanes = 4
val numPerLaneReqs = 2
@@ -410,8 +555,8 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
val coalDataWidth = 128
val numInflightCoalRequests = 4
it should "work" in {
test(new UncoalescingUnit(testConfig))
it should "work in general case" in {
test(new Uncoalescer(uncoalescerTestConfig))
// vcs helps with simulation time, but sometimes errors with
// "mutation occurred during iteration" java error
// .withAnnotations(Seq(VcsBackendAnnotation))
@@ -426,7 +571,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
c.io.newEntry.lanes(0).reqs(1).offset.poke(0.U)
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes
c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four)
c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B)
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
@@ -460,7 +605,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
// offset is counting from LSB
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(0)(0).bits.source.expect(1.U)
c.io.uncoalResps(0)(1).bits.data.expect(0xdeadbeefL.U)
c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(0)(1).bits.source.expect(2.U)
c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U)
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
@@ -468,6 +613,67 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
c.io.uncoalResps(2)(1).bits.source.expect(2.U)
}
}
it should "uncoalesce when coalesced to the same word offset" in {
test(new Uncoalescer(uncoalescerTestConfig))
// .withAnnotations(Seq(VcsBackendAnnotation))
{ c =>
val sourceId = 0.U
val four = c.io.newEntry.sizeEnumT.FOUR
c.io.coalReqValid.poke(true.B)
c.io.newEntry.source.poke(sourceId)
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(0).source.poke(0.U)
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B)
c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(1).reqs(0).source.poke(1.U)
c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B)
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(0).source.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B)
c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(3).reqs(0).source.poke(3.U)
c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four)
c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B)
c.clock.step()
c.io.coalReqValid.poke(false.B)
c.clock.step()
c.io.coalResp.valid.poke(true.B)
c.io.coalResp.bits.source.poke(sourceId)
val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL)
c.io.coalResp.bits.data.poke(lit.U)
// table lookup is combinational at the same cycle
// offset is counting from LSB
c.io.uncoalResps(0)(0).valid.expect(true.B)
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(0)(0).bits.source.expect(0.U)
c.io.uncoalResps(0)(1).valid.expect(false.B)
c.io.uncoalResps(1)(0).valid.expect(true.B)
c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(1)(0).bits.source.expect(1.U)
c.io.uncoalResps(1)(1).valid.expect(false.B)
c.io.uncoalResps(2)(0).valid.expect(true.B)
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
c.io.uncoalResps(2)(1).valid.expect(false.B)
c.io.uncoalResps(3)(0).valid.expect(true.B)
c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(3)(0).bits.source.expect(3.U)
c.io.uncoalResps(3)(1).valid.expect(false.B)
}
}
}
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {