diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 71b108c..56c99aa 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -36,29 +36,29 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum { } case class CoalescerConfig( - numLanes: Int, // number of lanes (or threads) in a warp - maxSize: Int, // maximum burst size (64 bytes) - queueDepth: Int, // request window per lane - waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane - addressWidth: Int, // assume <= 32 - dataBusWidth: Int, // memory-side downstream TileLink data bus size - // this has to be at least larger than the word size for - // the coalescer to perform well - // watermark = 2, // minimum buffer occupancy to start coalescing - wordSizeInBytes: Int, // 32-bit system - wordWidth: Int, // log(WORD_SIZE) - numOldSrcIds: Int, // num of outstanding requests per lane, from processor - numNewSrcIds: Int, // num of outstanding coalesced requests - respQueueDepth: Int, // depth of the response fifo queues - coalSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers - // must be power of 2's - sizeEnum: InFlightTableSizeEnum -) + numLanes: Int, // number of lanes (or threads) in a warp + queueDepth: Int, // request window per lane + waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane + addressWidth: Int, // assume <= 32 + dataBusWidth: Int, // memory-side downstream TileLink data bus size + // this has to be at least larger than the word size for + // the coalescer to perform well + // watermark = 2, // minimum buffer occupancy to start coalescing + wordSizeInBytes: Int, // 32-bit system + wordWidth: Int, // log(WORD_SIZE) + numOldSrcIds: Int, // num of outstanding requests per lane, from processor + numNewSrcIds: Int, // num of outstanding coalesced requests + respQueueDepth: Int, // depth of the response fifo queues + coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers + // each size is log(byteSize) + sizeEnum: InFlightTableSizeEnum, +) { + // maximum coalesced size + def maxCoalLogSize: Int = coalLogSizes.max +} object defaultConfig extends CoalescerConfig( numLanes = 4, - // TODO: bigger size - maxSize = 3, queueDepth = 1, waitTimeout = 8, addressWidth = 24, @@ -69,7 +69,7 @@ object defaultConfig extends CoalescerConfig( numOldSrcIds = 16, numNewSrcIds = 4, respQueueDepth = 4, - coalSizes = Seq(3), + coalLogSizes = Seq(3), sizeEnum = DefaultInFlightTableSizeEnum ) @@ -153,10 +153,14 @@ class ReqSourceGen(sourceWidth: Int) extends Module { // A shift-register queue implementation that supports invalidating entries // and exposing queue contents as output IO. (TODO: support deadline) // Initially copied from freechips.rocketchip.util.ShiftQueue. -// If `pipe` is true, support enqueueing to a full queue when also dequeueing. +// The queue only shifts down when `allowShift` is given true. Dequeueing +// works normally, but if allowShift was false, the queue head will stay +// invalid after dequeueing. This option is added in order to synchronize the +// shifting of the queues between lanes to model the SIMD behavior. +// If `pipe` is true, support enqueueing to a full queue when head is being +// dequeued at the next cycle. // Software model: window.py -class CoalShiftQueue[T <: Data]( - gen: T, +class CoalShiftQueue[T <: Data]( gen: T, val entries: Int, pipe: Boolean = true, flow: Boolean = false @@ -164,6 +168,7 @@ class CoalShiftQueue[T <: Data]( val io = IO(new Bundle { val queue = new QueueIO(gen, entries) val invalidate = Input(Valid(UInt(entries.W))) + val allowShift = Input(Bool()) val mask = Output(UInt(entries.W)) val elts = Output(Vec(entries, gen)) // 'QueueIO' provides io.count, but we might not want to use it in the @@ -192,7 +197,7 @@ class CoalShiftQueue[T <: Data]( def paddedUsed = pad({ i: Int => used(i) }) def validAfterInv(i: Int) = valid(i) && (!io.invalidate.valid || !io.invalidate.bits(i)) - val shift = (used =/= 0.U) && (io.queue.deq.ready || !validAfterInv(0)) + val shift = io.allowShift && (used =/= 0.U) && (io.queue.deq.fire || !validAfterInv(0)) for (i <- 0 until entries) { val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1)) val wen = Mux( @@ -208,27 +213,28 @@ class CoalShiftQueue[T <: Data]( (io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1), (io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i) ) + // additionally, head entry should get invalidated when dequeue fired + // but queue didn't shift (e.g. because allowShift was false) + when (io.queue.deq.fire && !shift) { + valid(0) := false.B + } } when(io.queue.enq.fire) { - when(!io.queue.deq.fire) { + when(!shift) { used := (used << 1.U) | 1.U } - }.elsewhen(io.queue.deq.fire) { + }.elsewhen(shift) { used := used >> 1.U } io.queue.enq.ready := !valid(entries - 1) - // We don't want to invalidate deq.valid response right away even when - // io.invalidate(head) is true. - // Coalescing unit consumes queue head's validity, and produces its new - // validity. Deasserting deq.valid right away will result in a combinational - // cycle. - io.queue.deq.valid := valid(0) + io.queue.deq.valid := validAfterInv(0) io.queue.deq.bits := elts.head assert(!flow, "flow-through is not implemented") if (flow) { + // FIXME old code when(io.queue.enq.valid) { io.queue.deq.valid := true.B } when(!valid(0)) { io.queue.deq.bits := io.queue.enq.bits } } @@ -243,7 +249,7 @@ class CoalShiftQueue[T <: Data]( } // Software model: coalescer.py -class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], +class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], config: CoalescerConfig) extends Module { val io = IO(new Bundle { val window = Input(Vec(config.numLanes, windowT.io.cloneType)) @@ -251,8 +257,10 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W)) val baseAddr = Output(UInt(config.addressWidth.W)) val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W))) - val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W)) - val coverageHits = Output(UInt((1 << config.maxSize).W)) + // number of entries matched with this leader lane's head. + // maximum is numLanes * queueDepth + val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W)) + val coverageHits = Output(UInt((1 << config.maxCoalLogSize).W)) }) }) @@ -277,14 +285,12 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], leadersValid(i), head.source, head.address) } } - - // debug assertions and prints when (leadersValid.reduce(_ || _)) { assert(testNoQueueDrift, "unexpected drift between lane request queues") - printQueueHeads + // printQueueHeads } - val size = coalSize + val size = coalLogSize val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = { (req0.op === req1.op) && @@ -294,18 +300,24 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], // Gives a 2-D table of Bools representing match at every queue entry, // for each lane (so 3-D in total). - val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) => - // TODO: match leader to only lanes >= leader idx - io.window.map { followerLane => - // compare leader's head against follower's every queue entry - (followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) => - canMatch(follower, followerValid, leader, leaderValid) + val matchTablePerLane = (leaders zip leadersValid).zipWithIndex + .map { case ((leader, leaderValid), leaderIndex) => + io.window.zipWithIndex.map { case (followerQueue, followerIndex) => + // compare leader's head against follower's every queue entry + (followerQueue.elts zip followerQueue.mask.asBools) + .map { case (follower, followerValid) => + // match leader to only followers at lanes >= leader idx + // this halves the number of comparators + if (followerIndex < leaderIndex) false.B + else canMatch(follower, followerValid, leader, leaderValid) + } } } - } // TODO: potentially expensive: popcount & adder - val matchCounts = matchTablePerLane.map(leader => leader.map(PopCount(_)).reduce(_ +& _)) + val matchCounts = matchTablePerLane.map(table => + table.map(PopCount(_)) // sum up each column + .reduce(_ +& _)) val canCoalesce = matchCounts.map(_ > 1.U) // TODO: potentially expensive @@ -323,6 +335,18 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], })(chosenLeaderIdx) val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx) + // coverage calculation + def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth) + // 2-D table flattened to 1-D + val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address))) + val valids = io.window.map(_.mask).flatMap(_.asBools) + // indicates whether each word in the coalesced chunk is accessed by any of the + // queue entries. e.g. if [ 1 1 1 1 ], all of the four words in the coalesced + // data has been accessed and we've reached 100% utilization. + val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target => + (offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _) + } + // debug prints when (leadersValid.reduce(_ || _)) { matchCounts.zipWithIndex.foreach { case (count, i) => @@ -334,14 +358,13 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], printf("%d ", m) } printf("]\n") - } + printf("chosenMatchCount = %d\n", chosenMatchCount) - // coverage calculation - def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth) - val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address))) - val valids = io.window.map(_.mask).flatMap(_.asBools) - val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target => - (offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _) + printf("hits = [ ") + hits.foreach { m => + printf("%d ", m) + } + printf("]\n") } io.results.leaderIdx := chosenLeaderIdx @@ -354,19 +377,21 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry], // Software model: coalescer.py class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry, config: CoalescerConfig) extends Module { - val io = IO(new Bundle { + // coalescing window, connected to the contents of the request queues val window = Input(Vec(config.numLanes, windowT.io.cloneType)) - val outReq = DecoupledIO(coalReqT.cloneType) + // generated coalesced request + val coalReq = DecoupledIO(coalReqT.cloneType) + // invalidate signals going into each request queue's head val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W)))) }) - val coalescers = config.coalSizes.map(size => Module(new MonoCoalescer(size, windowT, config))) + val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config))) coalescers.foreach(_.io.window := io.window) - def normalize(x: Seq[UInt]): Seq[UInt] = { - x.zip(config.coalSizes).map { case (hits, size) => - (hits << (config.maxSize - size).U).asUInt + def normalize(valPerSize: Seq[UInt]): Seq[UInt] = { + (valPerSize zip config.coalLogSizes).map { case (hits, size) => + (hits << (config.maxCoalLogSize - size).U).asUInt } } @@ -378,27 +403,40 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE }._2 } + // normalize to maximum coalescing size so that we can do fair comparisons + // between coalescing results of different sizes val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount)) val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits)) - val chosenIdx = Wire(UInt(log2Ceil(config.coalSizes.size).W)) + val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W)) val chosenValid = Wire(Bool()) // minimum 25% coverage - val minCoverage = 1.max(1 << (config.maxSize - 4)) + val minCoverage = 1.max(1 << ((config.maxCoalLogSize - 2) - 2)) + when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) { - chosenIdx := argMax(normalizedHits) + chosenSizeIdx := argMax(normalizedHits) chosenValid := true.B + printf("coalescing success by coverage policy\n") }.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) { - chosenIdx := argMax(normalizedMatches) + chosenSizeIdx := argMax(normalizedMatches) chosenValid := true.B + printf("coalescing success by matches policy\n") }.otherwise { - chosenIdx := DontCare + chosenSizeIdx := DontCare chosenValid := false.B } + def debugPolicyPrint() = { + printf("matchCount[0]=%d\n", coalescers(0).io.results.matchCount) + printf("normalizedMatches[0]=%d\n", normalizedMatches(0)) + printf("coverageHits[0]=%d\n", coalescers(0).io.results.coverageHits) + printf("normalizedHits[0]=%d\n", normalizedHits(0)) + printf("minCoverage=%d\n", minCoverage.U) + } + // create coalesced request - val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenIdx) - val chosenSize = VecInit(coalescers.map(_.size.U))(chosenIdx) + val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenSizeIdx) + val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx) // flatten requests and matches val flatReqs = io.window.flatMap(_.elts) @@ -411,8 +449,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE // note: this is word-level coalescing. if finer granularity is needed, need to modify code val numWords = (1.U << (chosenSize - config.wordWidth.U)).asUInt - val maxWords = 1 << (config.maxSize - config.wordWidth) - val addrMask = Wire(UInt(config.maxSize.W)) + val maxWords = 1 << (config.maxCoalLogSize - config.wordWidth) + val addrMask = Wire(UInt(config.maxCoalLogSize.W)) addrMask := (1.U << chosenSize).asUInt - 1.U val data = Wire(Vec(maxWords, UInt((config.wordSizeInBytes * 8).W))) @@ -420,7 +458,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE for (i <- 0 until maxWords) { val sel = flatReqs.zip(flatMatches).map { case (req, m) => - m && ((req.address(config.maxSize - 1, 0) & addrMask) === i.U) + m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U) } // TODO: SW uses priority encoder, not sure about behavior of MuxCase data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) => @@ -435,18 +473,20 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE } val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds))) - sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created + sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created - io.outReq.bits.source := sourceGen.io.id.bits - io.outReq.bits.mask := mask.asUInt - io.outReq.bits.data := data.asUInt - io.outReq.bits.size := chosenSize - io.outReq.bits.address := chosenBundle.baseAddr - io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op - io.outReq.valid := chosenValid && sourceGen.io.id.valid + val coalesceValid = chosenValid && sourceGen.io.id.valid + + io.coalReq.bits.source := sourceGen.io.id.bits + io.coalReq.bits.mask := mask.asUInt + io.coalReq.bits.data := data.asUInt + io.coalReq.bits.size := chosenSize + io.coalReq.bits.address := chosenBundle.baseAddr + io.coalReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op + io.coalReq.valid := coalesceValid io.invalidate.bits := chosenBundle.matchOH - io.invalidate.valid := io.outReq.fire // invalidate only when fire + io.invalidate.valid := io.coalReq.fire // invalidate only when fire dontTouch(io.invalidate) // debug @@ -471,7 +511,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth)) } - val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.addressWidth, config.maxSize) + val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.addressWidth, config.maxCoalLogSize) val coalescer = Module(new MultiCoalescer(reqQueues.head, coalReqT, config)) coalescer.io.window := reqQueues.map(_.io) @@ -511,20 +551,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready") reqQueue.io.queue.enq.valid := tlIn.a.valid reqQueue.io.queue.enq.bits := req - // TODO: deq.ready should respect downstream ready + // TODO: deq.ready should respect downstream arbiter reqQueue.io.queue.deq.ready := true.B + // invalidate queue entries that contain original core requests that got + // coalesced into a wider one reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane) reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid + reqQueue.io.allowShift := true.B + // NOTE: this relies on CoalShiftQueue's behavior combinationally + // deasserting deq.valid in the same cycle that the head invalidate + // signal goes up. tlOut.a.valid := reqQueue.io.queue.deq.valid tlOut.a.bits := reqQueue.io.queue.deq.bits.toTLA(edgeOut) } val (tlCoal, edgeCoal) = outer.coalescerNode.out(0) - tlCoal.a.valid := coalescer.io.outReq.valid - tlCoal.a.bits := coalescer.io.outReq.bits.toTLA(edgeCoal) - coalescer.io.outReq.ready := tlCoal.a.ready + tlCoal.a.valid := coalescer.io.coalReq.valid + tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal) + coalescer.io.coalReq.ready := tlCoal.a.ready tlCoal.b.ready := true.B tlCoal.c.valid := false.B // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below. @@ -541,7 +587,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends // coalesced request. Upper bound is min(DEPTH, 2**sourceWidth). val numPerLaneReqs = config.queueDepth - val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.maxSize) + val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.maxCoalLogSize) val respQueues = Seq.tabulate(config.numLanes) { _ => Module( new MultiPortQueue( @@ -550,6 +596,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends // requests that didn't get coalesced, and M is the maximum number of // single-lane requests that can go into a coalesced request. // (`numPerLaneReqs`). + // TODO: potentially expensive, because this generates more FFs. + // Rather than enqueueing all responses in a single cycle, consider + // enqueueing one by one (at the cost of possibly stalling downstream). 1 + numPerLaneReqs, // deq_lanes = 1 because we're serializing all responses to 1 port that // goes back to the core. @@ -566,7 +615,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends ) } val respQueueNoncoalPort = 0 - val respQueueCoalPortOffset = 1 + val respQueueUncoalPortOffset = 1 (outer.node.in zip outer.node.out).zipWithIndex.foreach { case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge @@ -645,51 +694,40 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends // logic to generate the Inflight Entry into the uncoalescer, where it should be. // this also reduces top level clutter. - val offsetBits = 4 // FIXME hardcoded - // but the width of the size enum - val newEntry = Wire( - new InflightCoalReqTableEntry( - config.numLanes, - numPerLaneReqs, - sourceWidth, - offsetBits, - config.sizeEnum - ) - ) - println(s"=========== table sourceWidth: ${sourceWidth}") - // println(s"=========== table sizeEnumBits: ${newEntry.sizeEnumBits}") - newEntry.source := coalescer.io.outReq.bits.source + val uncoalescer = Module(new Uncoalescer(config)) + + val newEntry = Wire(uncoalescer.inflightTable.entryT) + newEntry.source := coalescer.io.coalReq.bits.source // TODO: richard to write table fill logic - // FIXME: this assertion used to say 1 << config.MAX_SIZE - // I changed this to say DATA BUS SIZE. We need another assertion - // to assert that MAX_SIZE is <= DATA_BUS_SIZE because we do not support - // multi-beat writes currently - assert( + assert (config.maxCoalLogSize <= config.dataBusWidth, + "multi-beat coalesced reads/writes are currently not supported") + assert ( tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8, - s"tlCoal param dataBits (${tlCoal.params.dataBits}) mismatch coalescer constant" + s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant" + s" (${(1 << config.dataBusWidth) * 8})" ) - val origReqs = reqQueues.map(q => q.io.queue.deq.bits) - newEntry.lanes.foreach { l => - l.reqs.zipWithIndex.foreach { case (r, i) => - // TODO: this part needs the actual coalescing logic to work - r.valid := false.B - r.source := origReqs(i).source - r.offset := (origReqs(i).address % (1 << config.maxSize).U) >> config.wordWidth - r.sizeEnum := config.sizeEnum.logSizeToEnum(origReqs(i).size) + val reqQueueHeads = reqQueues.map(q => q.io.queue.deq.bits) + // Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the + // coalescer to every (numLanes * queueDepth) entry in the inflight table. + (newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex + .foreach { case ((laneEntry, laneInv), lane) => + (laneEntry.reqs zip laneInv.asBools).zipWithIndex + .foreach { case ((reqEntry, inv), i) => + val req = reqQueues(lane).io.elts(i) + when ((coalescer.io.invalidate.valid && inv)) { + printf(s"coalescer: reqQueue(${lane})(${i}) got invalidated (source=%d)\n", req.source) + } + reqEntry.valid := (coalescer.io.invalidate.valid && inv) + reqEntry.source := req.source + reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordWidth) + reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size) + // TODO: load/store op + } } - } - newEntry.lanes(0).reqs(0).valid := true.B - newEntry.lanes(1).reqs(0).valid := true.B - newEntry.lanes(2).reqs(0).valid := true.B - newEntry.lanes(3).reqs(0).valid := true.B dontTouch(newEntry) - // Uncoalescer module uncoalesces responses back to each lane - val uncoalescer = Module(new UncoalescingUnit(config)) - - uncoalescer.io.coalReqValid := coalescer.io.outReq.valid + uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid uncoalescer.io.newEntry := newEntry // Cleanup: custom <>? uncoalescer.io.coalResp.valid := tlCoal.d.valid @@ -698,22 +736,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends tlCoal.d.ready := uncoalescer.io.coalResp.ready // Queue up synthesized uncoalesced responses into each lane's response queue - (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) => - lanes.zipWithIndex.foreach { case (resp, i) => + (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) => + perLaneResps.zipWithIndex.foreach { case (resp, i) => // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream // cache. This should ideally not happen though. assert( - q.io.enq(respQueueCoalPortOffset + i).ready, - s"respQueue: enq port for 0-th coalesced response is blocked" + q.io.enq(respQueueUncoalPortOffset + i).ready, + s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}" ) - q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid - q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits + q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid + q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits + // debug + // when (resp.valid) { + // printf(s"${i}-th uncoalesced response came back from lane ${lane}\n") + // } // dontTouch(q.io.enq(respQueueCoalPortOffset)) } } // Debug - dontTouch(coalescer.io.outReq) + dontTouch(coalescer.io.coalReq) val coalRespData = tlCoal.d.bits.data dontTouch(coalRespData) @@ -730,10 +772,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends // FIXME: overlaps with RespQueueEntry. Trait-ify class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle { val source = UInt(log2Ceil(config.numNewSrcIds).W) - val data = UInt((8 * (1 << config.maxSize)).W) + val data = UInt((8 * (1 << config.maxCoalLogSize)).W) } -class UncoalescingUnit(config: CoalescerConfig) extends Module { +class Uncoalescer(config: CoalescerConfig) extends Module { // notes to hansung: // val numLanes: Int, <-> config.NUM_LANES // val numPerLaneReqs: Int, <-> config.DEPTH @@ -833,19 +875,21 @@ class UncoalescingUnit(config: CoalescerConfig) extends Module { // split the coalesced response back to individual per-lane responses with the // right metadata. class InflightCoalReqTable(config: CoalescerConfig) extends Module { - val offsetBits = 4 // FIXME hardcoded - val sizeBits = 2 // FIXME hardcoded + val offsetBits = config.maxCoalLogSize - config.wordWidth // assumes word offset val entryT = new InflightCoalReqTableEntry( config.numLanes, config.queueDepth, log2Ceil(config.numOldSrcIds), - config.maxSize, + config.maxCoalLogSize, config.sizeEnum ) val entries = config.numNewSrcIds val sourceWidth = log2Ceil(config.numOldSrcIds) + println(s"=========== table sourceWidth: ${sourceWidth}") + println(s"=========== table sizeEnumBits: ${entryT.sizeEnumT.getWidth}") + val io = IO(new Bundle { val enq = Flipped(Decoupled(entryT)) // TODO: return actual stuff diff --git a/src/test/scala/coalescing/CoalescingUnitTest.scala b/src/test/scala/coalescing/CoalescingUnitTest.scala index 0bb5fad..29b8a2c 100644 --- a/src/test/scala/coalescing/CoalescingUnitTest.scala +++ b/src/test/scala/coalescing/CoalescingUnitTest.scala @@ -35,26 +35,46 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester { class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule { val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ => - TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters( - name = "processor-nodes", - sourceId = IdRange(0, testConfig.numOldSrcIds), -// requestFifo = true, - visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig) + TLClientNode( + Seq( + TLMasterPortParameters.v1( + Seq( + TLClientParameters( + name = "processor-nodes", + sourceId = IdRange(0, testConfig.numOldSrcIds), + visibility = Seq(AddressSet(0x0, 0xffffff)) + ) + ) + ) + ) + ) // 24 bit address space (TODO probably use testConfig) } val device = new SimpleDevice("dummy", Seq("dummy")) val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus val l2Nodes = Seq.tabulate(5) { _ => - TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters( - address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode - resources = device.reg, - regionType = RegionType.UNCACHED, - executable = true, - supportsGet = TransferSizes(1, beatBytes), - supportsPutFull = TransferSizes(1, beatBytes), - supportsPutPartial = TransferSizes(1, beatBytes), - supportsHint = TransferSizes(1, beatBytes), - fifoId = Some(0))), beatBytes))) + TLManagerNode( + Seq( + TLSlavePortParameters.v1( + Seq( + TLManagerParameters( + address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode + resources = device.reg, + regionType = RegionType.UNCACHED, + executable = true, + supportsArithmetic = TransferSizes(1, beatBytes), + supportsLogical = TransferSizes(1, beatBytes), + supportsGet = TransferSizes(1, beatBytes), + supportsPutFull = TransferSizes(1, beatBytes), + supportsPutPartial = TransferSizes(1, beatBytes), + supportsHint = TransferSizes(1, beatBytes), + fifoId = Some(0) + ) + ), + beatBytes + ) + ) + ) } val dut = LazyModule(new CoalescingUnit(testConfig)) @@ -81,84 +101,116 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI // val coalMasterNode = coal.coalescerNode.makeIOs() } +object testConfig extends CoalescerConfig( + numLanes = 4, + queueDepth = 1, + waitTimeout = 8, + addressWidth = 24, + dataBusWidth = 5, + // watermark = 2, + wordSizeInBytes = 4, + wordWidth = 2, + numOldSrcIds = 16, + numNewSrcIds = 4, + respQueueDepth = 4, + coalLogSizes = Seq(3), + sizeEnum = DefaultInFlightTableSizeEnum +) + class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "multi- and mono-coalescers" - it should "coalesce fully consecutive accesses at size 4, only once" in { - implicit val p: Parameters = Parameters.empty + implicit val p: Parameters = Parameters.empty - val tb = LazyModule(new DummyCoalescingUnitTB()) -// val outer = LazyModule(new CoalescingUnit(testConfig)) - - val coal = tb.dut - - test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c => - val nodes = c.coalIOs.map(_.head) -// val nodes = c.cpuNodesImp.map(_.out.head._1) -// val nodes = c.coal.node.in.map(_._1) -// val nodes = c.mitmNodesImp.map(_.in.head._1) - - def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = { - val node = nodes(idx) + def pokeA( + nodes: Seq[TLBundle], + idx: Int, + op: Int, + size: Int, + source: Int, + addr: Int, + mask: Int, + data: Int + ): Unit = { + val node = nodes(idx) // node.a.ready.expect(true.B) // FIXME: this fails currently - node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get) - node.a.bits.param.poke(0.U) - node.a.bits.size.poke(size.U) - node.a.bits.source.poke(source.U) - node.a.bits.address.poke(addr.U) - node.a.bits.mask.poke(mask.U) - node.a.bits.data.poke(data.U) - node.a.bits.corrupt.poke(false.B) - node.a.valid.poke(true.B) - } + node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get) + node.a.bits.param.poke(0.U) + node.a.bits.size.poke(size.U) + node.a.bits.source.poke(source.U) + node.a.bits.address.poke(addr.U) + node.a.bits.mask.poke(mask.U) + node.a.bits.data.poke(data.U) + node.a.bits.corrupt.poke(false.B) + node.a.valid.poke(true.B) + } - def unsetA(): Unit = { - nodes.foreach { node => - node.a.valid.poke(false.B) - } - } + def unsetA(nodes: Seq[TLBundle]): Unit = { + nodes.foreach { node => + node.a.valid.poke(false.B) + } + } - // always ready to take coalesced requests -// c.coalMasterNode.head.a.ready.poke(true.B) -// c.coal.module.coalescer.io.outReq.ready.poke(true.B) + // it should "coalesce fully consecutive accesses at size 4, only once" in { + // test(makeTb().module) + // .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) + // { c => + // println(s"coalIO length = ${c.coalIOs(0).length}") + // val nodes = c.coalIOs.map(_.head) +// // val nodes = c.cpuNodesImp.map(_.out.head._1) +// // val nodes = c.coal.node.in.map(_._1) +// // val nodes = c.mitmNodesImp.map(_.in.head._1) - pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111) - pokeA(nodes, idx=1, op=1, size=2, source=0, addr=0x14, mask=0xf, data=0x2222) - pokeA(nodes, idx=2, op=1, size=2, source=0, addr=0x18, mask=0xf, data=0x3333) - pokeA(nodes, idx=3, op=1, size=2, source=0, addr=0x1c, mask=0xf, data=0x4444) + // // always ready to take coalesced requests +// // c.coalMasterNode.head.a.ready.poke(true.B) +// // c.coal.module.coalescer.io.outReq.ready.poke(true.B) + + // pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111) + // pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222) + // pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333) + // pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444) + + // c.clock.step() + + // unsetA(nodes) + + // c.clock.step() + // c.clock.step() + // } + // } + + it should "coalesce identical addresses (stride of 0)" in { + test(LazyModule(new DummyCoalescingUnitTB()).module) + .withAnnotations(Seq(VcsBackendAnnotation)) + { c => + println(s"coalIO length = ${c.coalIOs(0).length}") + val nodes = c.coalIOs.map(_.head) + + pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x1111) + pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x2222) + pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333) + pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x4444) c.clock.step() - unsetA() + unsetA(nodes) c.clock.step() c.clock.step() } } - it should "coalesce strided accesses at size 6" in { + it should "coalesce strided accesses at size 6" in {} - } + it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {} - it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in { + it should "not touch uncoalescable requests" in {} - } + it should "allow temporal coalescing when depth >=2" in {} - it should "not touch uncoalescable requests" in { + it should "select the most coverage mono-coalescer" in {} - } - - it should "allow temporal coalescing when depth >=2" in { - - } - - it should "select the most coverage mono-coalescer" in { - - } - - it should "resort to the backup policy when coverage is below average" in { - - } + it should "resort to the backup policy when coverage is below average" in {} } class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { @@ -167,6 +219,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "work like normal shiftqueue when no invalidate" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.queue.deq.ready.poke(false.B) + c.io.allowShift.poke(true.B) c.io.queue.enq.ready.expect(true.B) c.io.queue.enq.valid.poke(true.B) @@ -215,6 +268,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { it should "work when enqueing and dequeueing simultaneously" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(true.B) @@ -243,9 +297,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { } } - it should "work when enqueing and dequeueing simultaneously to a full queue" in { + it should "not shift entries when allowShift is false" in { + test(new CoalShiftQueue(UInt(8.W), 4)) { c => + c.io.invalidate.valid.poke(false.B) + c.io.queue.deq.ready.poke(false.B) + + c.io.allowShift.poke(false.B) + + // prepare + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) + c.clock.step() + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) + c.clock.step() + c.io.queue.enq.valid.poke(false.B) + + // dequeueing should work normally when allowShift is false... + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x12.U) + c.clock.step() + // but should stop there and not dequeue the next entry + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(false.B) + c.clock.step() + // when allowShift is back one, dequeueing should start working from next + // cycle + c.io.allowShift.poke(true.B) + c.clock.step() + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34.U) + } + } + + it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in { test(new CoalShiftQueue(UInt(8.W), 1)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(true.B) @@ -282,9 +374,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { } } - it should "invalidate head being dequeued" in { + it should "work when invalidating and enqueueing to a depth=1 queue" in { + test(new CoalShiftQueue(UInt(8.W), 1)) { c => + c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) + // no dequeueing + c.io.queue.deq.ready.poke(false.B) + + // prepare + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x12.U) + c.clock.step() + // invalidate, but don't allow shift + c.io.allowShift.poke(false.B) + c.io.invalidate.valid.poke(true.B) + c.io.invalidate.bits.poke(0x1.U) + // TODO: we might be able to enqueue to a full depth=1 queue whose only + // entry just got invalidated, so that enq.ready is true here, but + // it is a niche case + c.io.queue.enq.ready.expect(false.B) + c.clock.step() + // now try enqueueing now that we have space + c.io.allowShift.poke(true.B) + c.io.invalidate.valid.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x34.U) + c.io.queue.deq.valid.expect(false.B) + c.clock.step() + // see if it comes out right next cycle + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x34.U) + } + } + + it should "invalidate head that is also being dequeued" in { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) @@ -300,12 +430,11 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { c.io.queue.enq.valid.poke(false.B) // invalidate should work for the head just being dequeued at the same - // cycle. However, it should not change deq.valid right away to avoid - // combinational cycles (see definition). + // cycle c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits.poke(0x1.U) c.io.queue.deq.ready.poke(true.B) - c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.valid.expect(false.B) c.clock.step() // 0x12 should have been dequeued c.io.invalidate.valid.poke(false.B) @@ -315,10 +444,12 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { } } - it should "dequeue invalidated entries by itself" in { + it should "dequeue invalidated head on its own when allowShift" in { test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c => c.io.invalidate.valid.poke(false.B) + c.io.allowShift.poke(true.B) + // prepare c.io.queue.deq.ready.poke(false.B) c.io.queue.enq.ready.expect(true.B) @@ -338,19 +469,33 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { // invalidate two entries at head c.io.invalidate.valid.poke(true.B) c.io.invalidate.bits.poke(0x3.U) + c.io.queue.deq.ready.poke(false.B) // [ 0x56 | 0x34(inv) | 0x12(inv) ] c.clock.step() - // [ 0x56 | 0x34(inv) ] + // [ 0x56 | 0x34(inv) ] c.io.invalidate.valid.poke(false.B) c.io.queue.deq.ready.poke(false.B) c.clock.step() - // [ 0x56 ] + // [ 0x56 ] c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(true.B) c.io.queue.deq.bits.expect(0x56.U) c.clock.step() c.io.queue.deq.ready.poke(true.B) c.io.queue.deq.valid.expect(false.B) + c.clock.step() + + // do one more enqueue-then-dequeue to see if used bit was properly cleared + c.io.queue.deq.ready.poke(false.B) + c.io.queue.enq.ready.expect(true.B) + c.io.queue.enq.valid.poke(true.B) + c.io.queue.enq.bits.poke(0x78.U) + c.clock.step() + // should dequeue right away + c.io.queue.enq.valid.poke(false.B) + c.io.queue.deq.ready.poke(true.B) + c.io.queue.deq.valid.expect(true.B) + c.io.queue.deq.bits.expect(0x78.U) } } @@ -358,6 +503,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { test(new CoalShiftQueue(UInt(8.W), 4)) { c => c.io.invalidate.valid.poke(false.B) c.io.invalidate.bits.poke(0.U) + c.io.allowShift.poke(true.B) // prepare c.io.queue.deq.ready.poke(false.B) @@ -383,24 +529,23 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester { } } -object testConfig extends CoalescerConfig( - maxSize = 5, +object uncoalescerTestConfig extends CoalescerConfig( + numLanes = 4, queueDepth = 2, waitTimeout = 8, addressWidth = 24, dataBusWidth = 5, - numLanes = 4, // watermark = 2, wordSizeInBytes = 4, wordWidth = 2, numOldSrcIds = 16, numNewSrcIds = 4, respQueueDepth = 4, - coalSizes = Seq(4, 5), + coalLogSizes = Seq(4), sizeEnum = DefaultInFlightTableSizeEnum ) -class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { +class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester { behavior of "uncoalescer" val numLanes = 4 val numPerLaneReqs = 2 @@ -410,8 +555,8 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { val coalDataWidth = 128 val numInflightCoalRequests = 4 - it should "work" in { - test(new UncoalescingUnit(testConfig)) + it should "work in general case" in { + test(new Uncoalescer(uncoalescerTestConfig)) // vcs helps with simulation time, but sometimes errors with // "mutation occurred during iteration" java error // .withAnnotations(Seq(VcsBackendAnnotation)) @@ -426,7 +571,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B) c.io.newEntry.lanes(0).reqs(1).source.poke(2.U) - c.io.newEntry.lanes(0).reqs(1).offset.poke(0.U) + c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four) c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B) c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) @@ -460,7 +605,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { // offset is counting from LSB c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(0)(0).bits.source.expect(1.U) - c.io.uncoalResps(0)(1).bits.data.expect(0xdeadbeefL.U) + c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(0)(1).bits.source.expect(2.U) c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U) c.io.uncoalResps(2)(0).bits.source.expect(2.U) @@ -468,6 +613,67 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { c.io.uncoalResps(2)(1).bits.source.expect(2.U) } } + + it should "uncoalesce when coalesced to the same word offset" in { + test(new Uncoalescer(uncoalescerTestConfig)) + // .withAnnotations(Seq(VcsBackendAnnotation)) + { c => + val sourceId = 0.U + val four = c.io.newEntry.sizeEnumT.FOUR + c.io.coalReqValid.poke(true.B) + c.io.newEntry.source.poke(sourceId) + c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) + c.io.newEntry.lanes(0).reqs(0).source.poke(0.U) + c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) + c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four) + c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B) + c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B) + c.io.newEntry.lanes(1).reqs(0).source.poke(1.U) + c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U) + c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four) + c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B) + c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) + c.io.newEntry.lanes(2).reqs(0).source.poke(2.U) + c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U) + c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four) + c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B) + c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B) + c.io.newEntry.lanes(3).reqs(0).source.poke(3.U) + c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U) + c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four) + c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B) + + c.clock.step() + + c.io.coalReqValid.poke(false.B) + + c.clock.step() + + c.io.coalResp.valid.poke(true.B) + c.io.coalResp.bits.source.poke(sourceId) + val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) + c.io.coalResp.bits.data.poke(lit.U) + + // table lookup is combinational at the same cycle + // offset is counting from LSB + c.io.uncoalResps(0)(0).valid.expect(true.B) + c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(0)(0).bits.source.expect(0.U) + c.io.uncoalResps(0)(1).valid.expect(false.B) + c.io.uncoalResps(1)(0).valid.expect(true.B) + c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(1)(0).bits.source.expect(1.U) + c.io.uncoalResps(1)(1).valid.expect(false.B) + c.io.uncoalResps(2)(0).valid.expect(true.B) + c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(2)(0).bits.source.expect(2.U) + c.io.uncoalResps(2)(1).valid.expect(false.B) + c.io.uncoalResps(3)(0).valid.expect(true.B) + c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U) + c.io.uncoalResps(3)(0).bits.source.expect(3.U) + c.io.uncoalResps(3)(1).valid.expect(false.B) + } + } } class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {