Merge branch 'graphics' of https://github.com/hansungk/rocket-chip into graphics
This commit is contained in:
@@ -36,29 +36,29 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum {
|
||||
}
|
||||
|
||||
case class CoalescerConfig(
|
||||
numLanes: Int, // number of lanes (or threads) in a warp
|
||||
maxSize: Int, // maximum burst size (64 bytes)
|
||||
queueDepth: Int, // request window per lane
|
||||
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||
addressWidth: Int, // assume <= 32
|
||||
dataBusWidth: Int, // memory-side downstream TileLink data bus size
|
||||
// this has to be at least larger than the word size for
|
||||
// the coalescer to perform well
|
||||
// watermark = 2, // minimum buffer occupancy to start coalescing
|
||||
wordSizeInBytes: Int, // 32-bit system
|
||||
wordWidth: Int, // log(WORD_SIZE)
|
||||
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
|
||||
numNewSrcIds: Int, // num of outstanding coalesced requests
|
||||
respQueueDepth: Int, // depth of the response fifo queues
|
||||
coalSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
||||
// must be power of 2's
|
||||
sizeEnum: InFlightTableSizeEnum
|
||||
)
|
||||
numLanes: Int, // number of lanes (or threads) in a warp
|
||||
queueDepth: Int, // request window per lane
|
||||
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||
addressWidth: Int, // assume <= 32
|
||||
dataBusWidth: Int, // memory-side downstream TileLink data bus size
|
||||
// this has to be at least larger than the word size for
|
||||
// the coalescer to perform well
|
||||
// watermark = 2, // minimum buffer occupancy to start coalescing
|
||||
wordSizeInBytes: Int, // 32-bit system
|
||||
wordWidth: Int, // log(WORD_SIZE)
|
||||
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
|
||||
numNewSrcIds: Int, // num of outstanding coalesced requests
|
||||
respQueueDepth: Int, // depth of the response fifo queues
|
||||
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
||||
// each size is log(byteSize)
|
||||
sizeEnum: InFlightTableSizeEnum,
|
||||
) {
|
||||
// maximum coalesced size
|
||||
def maxCoalLogSize: Int = coalLogSizes.max
|
||||
}
|
||||
|
||||
object defaultConfig extends CoalescerConfig(
|
||||
numLanes = 4,
|
||||
// TODO: bigger size
|
||||
maxSize = 3,
|
||||
queueDepth = 1,
|
||||
waitTimeout = 8,
|
||||
addressWidth = 24,
|
||||
@@ -69,7 +69,7 @@ object defaultConfig extends CoalescerConfig(
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalSizes = Seq(3),
|
||||
coalLogSizes = Seq(3),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
)
|
||||
|
||||
@@ -153,10 +153,14 @@ class ReqSourceGen(sourceWidth: Int) extends Module {
|
||||
// A shift-register queue implementation that supports invalidating entries
|
||||
// and exposing queue contents as output IO. (TODO: support deadline)
|
||||
// Initially copied from freechips.rocketchip.util.ShiftQueue.
|
||||
// If `pipe` is true, support enqueueing to a full queue when also dequeueing.
|
||||
// The queue only shifts down when `allowShift` is given true. Dequeueing
|
||||
// works normally, but if allowShift was false, the queue head will stay
|
||||
// invalid after dequeueing. This option is added in order to synchronize the
|
||||
// shifting of the queues between lanes to model the SIMD behavior.
|
||||
// If `pipe` is true, support enqueueing to a full queue when head is being
|
||||
// dequeued at the next cycle.
|
||||
// Software model: window.py
|
||||
class CoalShiftQueue[T <: Data](
|
||||
gen: T,
|
||||
class CoalShiftQueue[T <: Data]( gen: T,
|
||||
val entries: Int,
|
||||
pipe: Boolean = true,
|
||||
flow: Boolean = false
|
||||
@@ -164,6 +168,7 @@ class CoalShiftQueue[T <: Data](
|
||||
val io = IO(new Bundle {
|
||||
val queue = new QueueIO(gen, entries)
|
||||
val invalidate = Input(Valid(UInt(entries.W)))
|
||||
val allowShift = Input(Bool())
|
||||
val mask = Output(UInt(entries.W))
|
||||
val elts = Output(Vec(entries, gen))
|
||||
// 'QueueIO' provides io.count, but we might not want to use it in the
|
||||
@@ -192,7 +197,7 @@ class CoalShiftQueue[T <: Data](
|
||||
def paddedUsed = pad({ i: Int => used(i) })
|
||||
def validAfterInv(i: Int) = valid(i) && (!io.invalidate.valid || !io.invalidate.bits(i))
|
||||
|
||||
val shift = (used =/= 0.U) && (io.queue.deq.ready || !validAfterInv(0))
|
||||
val shift = io.allowShift && (used =/= 0.U) && (io.queue.deq.fire || !validAfterInv(0))
|
||||
for (i <- 0 until entries) {
|
||||
val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
|
||||
val wen = Mux(
|
||||
@@ -208,27 +213,28 @@ class CoalShiftQueue[T <: Data](
|
||||
(io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
|
||||
(io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i)
|
||||
)
|
||||
// additionally, head entry should get invalidated when dequeue fired
|
||||
// but queue didn't shift (e.g. because allowShift was false)
|
||||
when (io.queue.deq.fire && !shift) {
|
||||
valid(0) := false.B
|
||||
}
|
||||
}
|
||||
|
||||
when(io.queue.enq.fire) {
|
||||
when(!io.queue.deq.fire) {
|
||||
when(!shift) {
|
||||
used := (used << 1.U) | 1.U
|
||||
}
|
||||
}.elsewhen(io.queue.deq.fire) {
|
||||
}.elsewhen(shift) {
|
||||
used := used >> 1.U
|
||||
}
|
||||
|
||||
io.queue.enq.ready := !valid(entries - 1)
|
||||
// We don't want to invalidate deq.valid response right away even when
|
||||
// io.invalidate(head) is true.
|
||||
// Coalescing unit consumes queue head's validity, and produces its new
|
||||
// validity. Deasserting deq.valid right away will result in a combinational
|
||||
// cycle.
|
||||
io.queue.deq.valid := valid(0)
|
||||
io.queue.deq.valid := validAfterInv(0)
|
||||
io.queue.deq.bits := elts.head
|
||||
|
||||
assert(!flow, "flow-through is not implemented")
|
||||
if (flow) {
|
||||
// FIXME old code
|
||||
when(io.queue.enq.valid) { io.queue.deq.valid := true.B }
|
||||
when(!valid(0)) { io.queue.deq.bits := io.queue.enq.bits }
|
||||
}
|
||||
@@ -243,7 +249,7 @@ class CoalShiftQueue[T <: Data](
|
||||
}
|
||||
|
||||
// Software model: coalescer.py
|
||||
class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
config: CoalescerConfig) extends Module {
|
||||
val io = IO(new Bundle {
|
||||
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
||||
@@ -251,8 +257,10 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
||||
val baseAddr = Output(UInt(config.addressWidth.W))
|
||||
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
|
||||
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W))
|
||||
val coverageHits = Output(UInt((1 << config.maxSize).W))
|
||||
// number of entries matched with this leader lane's head.
|
||||
// maximum is numLanes * queueDepth
|
||||
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W))
|
||||
val coverageHits = Output(UInt((1 << config.maxCoalLogSize).W))
|
||||
})
|
||||
})
|
||||
|
||||
@@ -277,14 +285,12 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
leadersValid(i), head.source, head.address)
|
||||
}
|
||||
}
|
||||
|
||||
// debug assertions and prints
|
||||
when (leadersValid.reduce(_ || _)) {
|
||||
assert(testNoQueueDrift, "unexpected drift between lane request queues")
|
||||
printQueueHeads
|
||||
// printQueueHeads
|
||||
}
|
||||
|
||||
val size = coalSize
|
||||
val size = coalLogSize
|
||||
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
|
||||
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
|
||||
(req0.op === req1.op) &&
|
||||
@@ -294,18 +300,24 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
|
||||
// Gives a 2-D table of Bools representing match at every queue entry,
|
||||
// for each lane (so 3-D in total).
|
||||
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) =>
|
||||
// TODO: match leader to only lanes >= leader idx
|
||||
io.window.map { followerLane =>
|
||||
// compare leader's head against follower's every queue entry
|
||||
(followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) =>
|
||||
canMatch(follower, followerValid, leader, leaderValid)
|
||||
val matchTablePerLane = (leaders zip leadersValid).zipWithIndex
|
||||
.map { case ((leader, leaderValid), leaderIndex) =>
|
||||
io.window.zipWithIndex.map { case (followerQueue, followerIndex) =>
|
||||
// compare leader's head against follower's every queue entry
|
||||
(followerQueue.elts zip followerQueue.mask.asBools)
|
||||
.map { case (follower, followerValid) =>
|
||||
// match leader to only followers at lanes >= leader idx
|
||||
// this halves the number of comparators
|
||||
if (followerIndex < leaderIndex) false.B
|
||||
else canMatch(follower, followerValid, leader, leaderValid)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: potentially expensive: popcount & adder
|
||||
val matchCounts = matchTablePerLane.map(leader => leader.map(PopCount(_)).reduce(_ +& _))
|
||||
val matchCounts = matchTablePerLane.map(table =>
|
||||
table.map(PopCount(_)) // sum up each column
|
||||
.reduce(_ +& _))
|
||||
val canCoalesce = matchCounts.map(_ > 1.U)
|
||||
|
||||
// TODO: potentially expensive
|
||||
@@ -323,6 +335,18 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
})(chosenLeaderIdx)
|
||||
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
|
||||
|
||||
// coverage calculation
|
||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
||||
// 2-D table flattened to 1-D
|
||||
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
||||
// indicates whether each word in the coalesced chunk is accessed by any of the
|
||||
// queue entries. e.g. if [ 1 1 1 1 ], all of the four words in the coalesced
|
||||
// data has been accessed and we've reached 100% utilization.
|
||||
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
||||
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
||||
}
|
||||
|
||||
// debug prints
|
||||
when (leadersValid.reduce(_ || _)) {
|
||||
matchCounts.zipWithIndex.foreach { case (count, i) =>
|
||||
@@ -334,14 +358,13 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
printf("%d ", m)
|
||||
}
|
||||
printf("]\n")
|
||||
}
|
||||
printf("chosenMatchCount = %d\n", chosenMatchCount)
|
||||
|
||||
// coverage calculation
|
||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
||||
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
||||
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
||||
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
||||
printf("hits = [ ")
|
||||
hits.foreach { m =>
|
||||
printf("%d ", m)
|
||||
}
|
||||
printf("]\n")
|
||||
}
|
||||
|
||||
io.results.leaderIdx := chosenLeaderIdx
|
||||
@@ -354,19 +377,21 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||
// Software model: coalescer.py
|
||||
class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry,
|
||||
config: CoalescerConfig) extends Module {
|
||||
|
||||
val io = IO(new Bundle {
|
||||
// coalescing window, connected to the contents of the request queues
|
||||
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
||||
val outReq = DecoupledIO(coalReqT.cloneType)
|
||||
// generated coalesced request
|
||||
val coalReq = DecoupledIO(coalReqT.cloneType)
|
||||
// invalidate signals going into each request queue's head
|
||||
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
|
||||
})
|
||||
|
||||
val coalescers = config.coalSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
|
||||
val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
|
||||
coalescers.foreach(_.io.window := io.window)
|
||||
|
||||
def normalize(x: Seq[UInt]): Seq[UInt] = {
|
||||
x.zip(config.coalSizes).map { case (hits, size) =>
|
||||
(hits << (config.maxSize - size).U).asUInt
|
||||
def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
|
||||
(valPerSize zip config.coalLogSizes).map { case (hits, size) =>
|
||||
(hits << (config.maxCoalLogSize - size).U).asUInt
|
||||
}
|
||||
}
|
||||
|
||||
@@ -378,27 +403,40 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
}._2
|
||||
}
|
||||
|
||||
// normalize to maximum coalescing size so that we can do fair comparisons
|
||||
// between coalescing results of different sizes
|
||||
val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount))
|
||||
val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits))
|
||||
|
||||
val chosenIdx = Wire(UInt(log2Ceil(config.coalSizes.size).W))
|
||||
val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
|
||||
val chosenValid = Wire(Bool())
|
||||
// minimum 25% coverage
|
||||
val minCoverage = 1.max(1 << (config.maxSize - 4))
|
||||
val minCoverage = 1.max(1 << ((config.maxCoalLogSize - 2) - 2))
|
||||
|
||||
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
|
||||
chosenIdx := argMax(normalizedHits)
|
||||
chosenSizeIdx := argMax(normalizedHits)
|
||||
chosenValid := true.B
|
||||
printf("coalescing success by coverage policy\n")
|
||||
}.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) {
|
||||
chosenIdx := argMax(normalizedMatches)
|
||||
chosenSizeIdx := argMax(normalizedMatches)
|
||||
chosenValid := true.B
|
||||
printf("coalescing success by matches policy\n")
|
||||
}.otherwise {
|
||||
chosenIdx := DontCare
|
||||
chosenSizeIdx := DontCare
|
||||
chosenValid := false.B
|
||||
}
|
||||
|
||||
def debugPolicyPrint() = {
|
||||
printf("matchCount[0]=%d\n", coalescers(0).io.results.matchCount)
|
||||
printf("normalizedMatches[0]=%d\n", normalizedMatches(0))
|
||||
printf("coverageHits[0]=%d\n", coalescers(0).io.results.coverageHits)
|
||||
printf("normalizedHits[0]=%d\n", normalizedHits(0))
|
||||
printf("minCoverage=%d\n", minCoverage.U)
|
||||
}
|
||||
|
||||
// create coalesced request
|
||||
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenIdx)
|
||||
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenIdx)
|
||||
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenSizeIdx)
|
||||
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
|
||||
|
||||
// flatten requests and matches
|
||||
val flatReqs = io.window.flatMap(_.elts)
|
||||
@@ -411,8 +449,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
|
||||
// note: this is word-level coalescing. if finer granularity is needed, need to modify code
|
||||
val numWords = (1.U << (chosenSize - config.wordWidth.U)).asUInt
|
||||
val maxWords = 1 << (config.maxSize - config.wordWidth)
|
||||
val addrMask = Wire(UInt(config.maxSize.W))
|
||||
val maxWords = 1 << (config.maxCoalLogSize - config.wordWidth)
|
||||
val addrMask = Wire(UInt(config.maxCoalLogSize.W))
|
||||
addrMask := (1.U << chosenSize).asUInt - 1.U
|
||||
|
||||
val data = Wire(Vec(maxWords, UInt((config.wordSizeInBytes * 8).W)))
|
||||
@@ -420,7 +458,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
|
||||
for (i <- 0 until maxWords) {
|
||||
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
|
||||
m && ((req.address(config.maxSize - 1, 0) & addrMask) === i.U)
|
||||
m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U)
|
||||
}
|
||||
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
|
||||
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
|
||||
@@ -435,18 +473,20 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
||||
}
|
||||
|
||||
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
|
||||
sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created
|
||||
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
|
||||
|
||||
io.outReq.bits.source := sourceGen.io.id.bits
|
||||
io.outReq.bits.mask := mask.asUInt
|
||||
io.outReq.bits.data := data.asUInt
|
||||
io.outReq.bits.size := chosenSize
|
||||
io.outReq.bits.address := chosenBundle.baseAddr
|
||||
io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
|
||||
io.outReq.valid := chosenValid && sourceGen.io.id.valid
|
||||
val coalesceValid = chosenValid && sourceGen.io.id.valid
|
||||
|
||||
io.coalReq.bits.source := sourceGen.io.id.bits
|
||||
io.coalReq.bits.mask := mask.asUInt
|
||||
io.coalReq.bits.data := data.asUInt
|
||||
io.coalReq.bits.size := chosenSize
|
||||
io.coalReq.bits.address := chosenBundle.baseAddr
|
||||
io.coalReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
|
||||
io.coalReq.valid := coalesceValid
|
||||
|
||||
io.invalidate.bits := chosenBundle.matchOH
|
||||
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
||||
io.invalidate.valid := io.coalReq.fire // invalidate only when fire
|
||||
|
||||
dontTouch(io.invalidate) // debug
|
||||
|
||||
@@ -471,7 +511,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth))
|
||||
}
|
||||
|
||||
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.addressWidth, config.maxSize)
|
||||
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.addressWidth, config.maxCoalLogSize)
|
||||
val coalescer = Module(new MultiCoalescer(reqQueues.head, coalReqT, config))
|
||||
coalescer.io.window := reqQueues.map(_.io)
|
||||
|
||||
@@ -511,20 +551,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready")
|
||||
reqQueue.io.queue.enq.valid := tlIn.a.valid
|
||||
reqQueue.io.queue.enq.bits := req
|
||||
// TODO: deq.ready should respect downstream ready
|
||||
// TODO: deq.ready should respect downstream arbiter
|
||||
reqQueue.io.queue.deq.ready := true.B
|
||||
// invalidate queue entries that contain original core requests that got
|
||||
// coalesced into a wider one
|
||||
reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane)
|
||||
reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid
|
||||
reqQueue.io.allowShift := true.B
|
||||
|
||||
// NOTE: this relies on CoalShiftQueue's behavior combinationally
|
||||
// deasserting deq.valid in the same cycle that the head invalidate
|
||||
// signal goes up.
|
||||
tlOut.a.valid := reqQueue.io.queue.deq.valid
|
||||
tlOut.a.bits := reqQueue.io.queue.deq.bits.toTLA(edgeOut)
|
||||
}
|
||||
|
||||
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
||||
|
||||
tlCoal.a.valid := coalescer.io.outReq.valid
|
||||
tlCoal.a.bits := coalescer.io.outReq.bits.toTLA(edgeCoal)
|
||||
coalescer.io.outReq.ready := tlCoal.a.ready
|
||||
tlCoal.a.valid := coalescer.io.coalReq.valid
|
||||
tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
|
||||
coalescer.io.coalReq.ready := tlCoal.a.ready
|
||||
tlCoal.b.ready := true.B
|
||||
tlCoal.c.valid := false.B
|
||||
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
|
||||
@@ -541,7 +587,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
||||
val numPerLaneReqs = config.queueDepth
|
||||
|
||||
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.maxSize)
|
||||
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.maxCoalLogSize)
|
||||
val respQueues = Seq.tabulate(config.numLanes) { _ =>
|
||||
Module(
|
||||
new MultiPortQueue(
|
||||
@@ -550,6 +596,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
// requests that didn't get coalesced, and M is the maximum number of
|
||||
// single-lane requests that can go into a coalesced request.
|
||||
// (`numPerLaneReqs`).
|
||||
// TODO: potentially expensive, because this generates more FFs.
|
||||
// Rather than enqueueing all responses in a single cycle, consider
|
||||
// enqueueing one by one (at the cost of possibly stalling downstream).
|
||||
1 + numPerLaneReqs,
|
||||
// deq_lanes = 1 because we're serializing all responses to 1 port that
|
||||
// goes back to the core.
|
||||
@@ -566,7 +615,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
)
|
||||
}
|
||||
val respQueueNoncoalPort = 0
|
||||
val respQueueCoalPortOffset = 1
|
||||
val respQueueUncoalPortOffset = 1
|
||||
|
||||
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
|
||||
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
|
||||
@@ -645,51 +694,40 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
|
||||
// this also reduces top level clutter.
|
||||
|
||||
val offsetBits = 4 // FIXME hardcoded
|
||||
// but the width of the size enum
|
||||
val newEntry = Wire(
|
||||
new InflightCoalReqTableEntry(
|
||||
config.numLanes,
|
||||
numPerLaneReqs,
|
||||
sourceWidth,
|
||||
offsetBits,
|
||||
config.sizeEnum
|
||||
)
|
||||
)
|
||||
println(s"=========== table sourceWidth: ${sourceWidth}")
|
||||
// println(s"=========== table sizeEnumBits: ${newEntry.sizeEnumBits}")
|
||||
newEntry.source := coalescer.io.outReq.bits.source
|
||||
val uncoalescer = Module(new Uncoalescer(config))
|
||||
|
||||
val newEntry = Wire(uncoalescer.inflightTable.entryT)
|
||||
newEntry.source := coalescer.io.coalReq.bits.source
|
||||
|
||||
// TODO: richard to write table fill logic
|
||||
// FIXME: this assertion used to say 1 << config.MAX_SIZE
|
||||
// I changed this to say DATA BUS SIZE. We need another assertion
|
||||
// to assert that MAX_SIZE is <= DATA_BUS_SIZE because we do not support
|
||||
// multi-beat writes currently
|
||||
assert(
|
||||
assert (config.maxCoalLogSize <= config.dataBusWidth,
|
||||
"multi-beat coalesced reads/writes are currently not supported")
|
||||
assert (
|
||||
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
|
||||
s"tlCoal param dataBits (${tlCoal.params.dataBits}) mismatch coalescer constant"
|
||||
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
|
||||
+ s" (${(1 << config.dataBusWidth) * 8})"
|
||||
)
|
||||
val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
|
||||
newEntry.lanes.foreach { l =>
|
||||
l.reqs.zipWithIndex.foreach { case (r, i) =>
|
||||
// TODO: this part needs the actual coalescing logic to work
|
||||
r.valid := false.B
|
||||
r.source := origReqs(i).source
|
||||
r.offset := (origReqs(i).address % (1 << config.maxSize).U) >> config.wordWidth
|
||||
r.sizeEnum := config.sizeEnum.logSizeToEnum(origReqs(i).size)
|
||||
val reqQueueHeads = reqQueues.map(q => q.io.queue.deq.bits)
|
||||
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
|
||||
// coalescer to every (numLanes * queueDepth) entry in the inflight table.
|
||||
(newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex
|
||||
.foreach { case ((laneEntry, laneInv), lane) =>
|
||||
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
|
||||
.foreach { case ((reqEntry, inv), i) =>
|
||||
val req = reqQueues(lane).io.elts(i)
|
||||
when ((coalescer.io.invalidate.valid && inv)) {
|
||||
printf(s"coalescer: reqQueue(${lane})(${i}) got invalidated (source=%d)\n", req.source)
|
||||
}
|
||||
reqEntry.valid := (coalescer.io.invalidate.valid && inv)
|
||||
reqEntry.source := req.source
|
||||
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordWidth)
|
||||
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
|
||||
// TODO: load/store op
|
||||
}
|
||||
}
|
||||
}
|
||||
newEntry.lanes(0).reqs(0).valid := true.B
|
||||
newEntry.lanes(1).reqs(0).valid := true.B
|
||||
newEntry.lanes(2).reqs(0).valid := true.B
|
||||
newEntry.lanes(3).reqs(0).valid := true.B
|
||||
dontTouch(newEntry)
|
||||
|
||||
// Uncoalescer module uncoalesces responses back to each lane
|
||||
val uncoalescer = Module(new UncoalescingUnit(config))
|
||||
|
||||
uncoalescer.io.coalReqValid := coalescer.io.outReq.valid
|
||||
uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid
|
||||
uncoalescer.io.newEntry := newEntry
|
||||
// Cleanup: custom <>?
|
||||
uncoalescer.io.coalResp.valid := tlCoal.d.valid
|
||||
@@ -698,22 +736,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
tlCoal.d.ready := uncoalescer.io.coalResp.ready
|
||||
|
||||
// Queue up synthesized uncoalesced responses into each lane's response queue
|
||||
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
||||
lanes.zipWithIndex.foreach { case (resp, i) =>
|
||||
(respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) =>
|
||||
perLaneResps.zipWithIndex.foreach { case (resp, i) =>
|
||||
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
|
||||
// cache. This should ideally not happen though.
|
||||
assert(
|
||||
q.io.enq(respQueueCoalPortOffset + i).ready,
|
||||
s"respQueue: enq port for 0-th coalesced response is blocked"
|
||||
q.io.enq(respQueueUncoalPortOffset + i).ready,
|
||||
s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}"
|
||||
)
|
||||
q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
|
||||
q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
|
||||
q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid
|
||||
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits
|
||||
// debug
|
||||
// when (resp.valid) {
|
||||
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
|
||||
// }
|
||||
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
||||
}
|
||||
}
|
||||
|
||||
// Debug
|
||||
dontTouch(coalescer.io.outReq)
|
||||
dontTouch(coalescer.io.coalReq)
|
||||
val coalRespData = tlCoal.d.bits.data
|
||||
dontTouch(coalRespData)
|
||||
|
||||
@@ -730,10 +772,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
||||
// FIXME: overlaps with RespQueueEntry. Trait-ify
|
||||
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
|
||||
val source = UInt(log2Ceil(config.numNewSrcIds).W)
|
||||
val data = UInt((8 * (1 << config.maxSize)).W)
|
||||
val data = UInt((8 * (1 << config.maxCoalLogSize)).W)
|
||||
}
|
||||
|
||||
class UncoalescingUnit(config: CoalescerConfig) extends Module {
|
||||
class Uncoalescer(config: CoalescerConfig) extends Module {
|
||||
// notes to hansung:
|
||||
// val numLanes: Int, <-> config.NUM_LANES
|
||||
// val numPerLaneReqs: Int, <-> config.DEPTH
|
||||
@@ -833,19 +875,21 @@ class UncoalescingUnit(config: CoalescerConfig) extends Module {
|
||||
// split the coalesced response back to individual per-lane responses with the
|
||||
// right metadata.
|
||||
class InflightCoalReqTable(config: CoalescerConfig) extends Module {
|
||||
val offsetBits = 4 // FIXME hardcoded
|
||||
val sizeBits = 2 // FIXME hardcoded
|
||||
val offsetBits = config.maxCoalLogSize - config.wordWidth // assumes word offset
|
||||
val entryT = new InflightCoalReqTableEntry(
|
||||
config.numLanes,
|
||||
config.queueDepth,
|
||||
log2Ceil(config.numOldSrcIds),
|
||||
config.maxSize,
|
||||
config.maxCoalLogSize,
|
||||
config.sizeEnum
|
||||
)
|
||||
|
||||
val entries = config.numNewSrcIds
|
||||
val sourceWidth = log2Ceil(config.numOldSrcIds)
|
||||
|
||||
println(s"=========== table sourceWidth: ${sourceWidth}")
|
||||
println(s"=========== table sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
|
||||
|
||||
val io = IO(new Bundle {
|
||||
val enq = Flipped(Decoupled(entryT))
|
||||
// TODO: return actual stuff
|
||||
|
||||
@@ -35,26 +35,46 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
|
||||
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
||||
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
|
||||
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
|
||||
name = "processor-nodes",
|
||||
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
||||
// requestFifo = true,
|
||||
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig)
|
||||
TLClientNode(
|
||||
Seq(
|
||||
TLMasterPortParameters.v1(
|
||||
Seq(
|
||||
TLClientParameters(
|
||||
name = "processor-nodes",
|
||||
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
||||
visibility = Seq(AddressSet(0x0, 0xffffff))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
) // 24 bit address space (TODO probably use testConfig)
|
||||
}
|
||||
|
||||
val device = new SimpleDevice("dummy", Seq("dummy"))
|
||||
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
|
||||
val l2Nodes = Seq.tabulate(5) { _ =>
|
||||
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters(
|
||||
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
||||
resources = device.reg,
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsGet = TransferSizes(1, beatBytes),
|
||||
supportsPutFull = TransferSizes(1, beatBytes),
|
||||
supportsPutPartial = TransferSizes(1, beatBytes),
|
||||
supportsHint = TransferSizes(1, beatBytes),
|
||||
fifoId = Some(0))), beatBytes)))
|
||||
TLManagerNode(
|
||||
Seq(
|
||||
TLSlavePortParameters.v1(
|
||||
Seq(
|
||||
TLManagerParameters(
|
||||
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
||||
resources = device.reg,
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsArithmetic = TransferSizes(1, beatBytes),
|
||||
supportsLogical = TransferSizes(1, beatBytes),
|
||||
supportsGet = TransferSizes(1, beatBytes),
|
||||
supportsPutFull = TransferSizes(1, beatBytes),
|
||||
supportsPutPartial = TransferSizes(1, beatBytes),
|
||||
supportsHint = TransferSizes(1, beatBytes),
|
||||
fifoId = Some(0)
|
||||
)
|
||||
),
|
||||
beatBytes
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
val dut = LazyModule(new CoalescingUnit(testConfig))
|
||||
@@ -81,84 +101,116 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
||||
// val coalMasterNode = coal.coalescerNode.makeIOs()
|
||||
}
|
||||
|
||||
object testConfig extends CoalescerConfig(
|
||||
numLanes = 4,
|
||||
queueDepth = 1,
|
||||
waitTimeout = 8,
|
||||
addressWidth = 24,
|
||||
dataBusWidth = 5,
|
||||
// watermark = 2,
|
||||
wordSizeInBytes = 4,
|
||||
wordWidth = 2,
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalLogSizes = Seq(3),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
)
|
||||
|
||||
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
behavior of "multi- and mono-coalescers"
|
||||
|
||||
it should "coalesce fully consecutive accesses at size 4, only once" in {
|
||||
implicit val p: Parameters = Parameters.empty
|
||||
implicit val p: Parameters = Parameters.empty
|
||||
|
||||
val tb = LazyModule(new DummyCoalescingUnitTB())
|
||||
// val outer = LazyModule(new CoalescingUnit(testConfig))
|
||||
|
||||
val coal = tb.dut
|
||||
|
||||
test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c =>
|
||||
val nodes = c.coalIOs.map(_.head)
|
||||
// val nodes = c.cpuNodesImp.map(_.out.head._1)
|
||||
// val nodes = c.coal.node.in.map(_._1)
|
||||
// val nodes = c.mitmNodesImp.map(_.in.head._1)
|
||||
|
||||
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = {
|
||||
val node = nodes(idx)
|
||||
def pokeA(
|
||||
nodes: Seq[TLBundle],
|
||||
idx: Int,
|
||||
op: Int,
|
||||
size: Int,
|
||||
source: Int,
|
||||
addr: Int,
|
||||
mask: Int,
|
||||
data: Int
|
||||
): Unit = {
|
||||
val node = nodes(idx)
|
||||
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
||||
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
||||
node.a.bits.param.poke(0.U)
|
||||
node.a.bits.size.poke(size.U)
|
||||
node.a.bits.source.poke(source.U)
|
||||
node.a.bits.address.poke(addr.U)
|
||||
node.a.bits.mask.poke(mask.U)
|
||||
node.a.bits.data.poke(data.U)
|
||||
node.a.bits.corrupt.poke(false.B)
|
||||
node.a.valid.poke(true.B)
|
||||
}
|
||||
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
||||
node.a.bits.param.poke(0.U)
|
||||
node.a.bits.size.poke(size.U)
|
||||
node.a.bits.source.poke(source.U)
|
||||
node.a.bits.address.poke(addr.U)
|
||||
node.a.bits.mask.poke(mask.U)
|
||||
node.a.bits.data.poke(data.U)
|
||||
node.a.bits.corrupt.poke(false.B)
|
||||
node.a.valid.poke(true.B)
|
||||
}
|
||||
|
||||
def unsetA(): Unit = {
|
||||
nodes.foreach { node =>
|
||||
node.a.valid.poke(false.B)
|
||||
}
|
||||
}
|
||||
def unsetA(nodes: Seq[TLBundle]): Unit = {
|
||||
nodes.foreach { node =>
|
||||
node.a.valid.poke(false.B)
|
||||
}
|
||||
}
|
||||
|
||||
// always ready to take coalesced requests
|
||||
// c.coalMasterNode.head.a.ready.poke(true.B)
|
||||
// c.coal.module.coalescer.io.outReq.ready.poke(true.B)
|
||||
// it should "coalesce fully consecutive accesses at size 4, only once" in {
|
||||
// test(makeTb().module)
|
||||
// .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
|
||||
// { c =>
|
||||
// println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||
// val nodes = c.coalIOs.map(_.head)
|
||||
// // val nodes = c.cpuNodesImp.map(_.out.head._1)
|
||||
// // val nodes = c.coal.node.in.map(_._1)
|
||||
// // val nodes = c.mitmNodesImp.map(_.in.head._1)
|
||||
|
||||
pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111)
|
||||
pokeA(nodes, idx=1, op=1, size=2, source=0, addr=0x14, mask=0xf, data=0x2222)
|
||||
pokeA(nodes, idx=2, op=1, size=2, source=0, addr=0x18, mask=0xf, data=0x3333)
|
||||
pokeA(nodes, idx=3, op=1, size=2, source=0, addr=0x1c, mask=0xf, data=0x4444)
|
||||
// // always ready to take coalesced requests
|
||||
// // c.coalMasterNode.head.a.ready.poke(true.B)
|
||||
// // c.coal.module.coalescer.io.outReq.ready.poke(true.B)
|
||||
|
||||
// pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
|
||||
// pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
|
||||
// pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||
// pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
|
||||
|
||||
// c.clock.step()
|
||||
|
||||
// unsetA(nodes)
|
||||
|
||||
// c.clock.step()
|
||||
// c.clock.step()
|
||||
// }
|
||||
// }
|
||||
|
||||
it should "coalesce identical addresses (stride of 0)" in {
|
||||
test(LazyModule(new DummyCoalescingUnitTB()).module)
|
||||
.withAnnotations(Seq(VcsBackendAnnotation))
|
||||
{ c =>
|
||||
println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||
val nodes = c.coalIOs.map(_.head)
|
||||
|
||||
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x1111)
|
||||
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x2222)
|
||||
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x4444)
|
||||
|
||||
c.clock.step()
|
||||
|
||||
unsetA()
|
||||
unsetA(nodes)
|
||||
|
||||
c.clock.step()
|
||||
c.clock.step()
|
||||
}
|
||||
}
|
||||
|
||||
it should "coalesce strided accesses at size 6" in {
|
||||
it should "coalesce strided accesses at size 6" in {}
|
||||
|
||||
}
|
||||
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {}
|
||||
|
||||
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {
|
||||
it should "not touch uncoalescable requests" in {}
|
||||
|
||||
}
|
||||
it should "allow temporal coalescing when depth >=2" in {}
|
||||
|
||||
it should "not touch uncoalescable requests" in {
|
||||
it should "select the most coverage mono-coalescer" in {}
|
||||
|
||||
}
|
||||
|
||||
it should "allow temporal coalescing when depth >=2" in {
|
||||
|
||||
}
|
||||
|
||||
it should "select the most coverage mono-coalescer" in {
|
||||
|
||||
}
|
||||
|
||||
it should "resort to the backup policy when coverage is below average" in {
|
||||
|
||||
}
|
||||
it should "resort to the backup policy when coverage is below average" in {}
|
||||
}
|
||||
|
||||
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
@@ -167,6 +219,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
it should "work like normal shiftqueue when no invalidate" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
@@ -215,6 +268,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
it should "work when enqueing and dequeueing simultaneously" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
@@ -243,9 +297,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
}
|
||||
}
|
||||
|
||||
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
|
||||
it should "not shift entries when allowShift is false" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
|
||||
c.io.allowShift.poke(false.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.clock.step()
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
|
||||
// dequeueing should work normally when allowShift is false...
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x12.U)
|
||||
c.clock.step()
|
||||
// but should stop there and not dequeue the next entry
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
c.clock.step()
|
||||
// when allowShift is back one, dequeueing should start working from next
|
||||
// cycle
|
||||
c.io.allowShift.poke(true.B)
|
||||
c.clock.step()
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
}
|
||||
}
|
||||
|
||||
it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
@@ -282,9 +374,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
}
|
||||
}
|
||||
|
||||
it should "invalidate head being dequeued" in {
|
||||
it should "work when invalidating and enqueueing to a depth=1 queue" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.allowShift.poke(true.B)
|
||||
// no dequeueing
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x12.U)
|
||||
c.clock.step()
|
||||
// invalidate, but don't allow shift
|
||||
c.io.allowShift.poke(false.B)
|
||||
c.io.invalidate.valid.poke(true.B)
|
||||
c.io.invalidate.bits.poke(0x1.U)
|
||||
// TODO: we might be able to enqueue to a full depth=1 queue whose only
|
||||
// entry just got invalidated, so that enq.ready is true here, but
|
||||
// it is a niche case
|
||||
c.io.queue.enq.ready.expect(false.B)
|
||||
c.clock.step()
|
||||
// now try enqueueing now that we have space
|
||||
c.io.allowShift.poke(true.B)
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x34.U)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
c.clock.step()
|
||||
// see if it comes out right next cycle
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x34.U)
|
||||
}
|
||||
}
|
||||
|
||||
it should "invalidate head that is also being dequeued" in {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
@@ -300,12 +430,11 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
|
||||
// invalidate should work for the head just being dequeued at the same
|
||||
// cycle. However, it should not change deq.valid right away to avoid
|
||||
// combinational cycles (see definition).
|
||||
// cycle
|
||||
c.io.invalidate.valid.poke(true.B)
|
||||
c.io.invalidate.bits.poke(0x1.U)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
c.clock.step()
|
||||
// 0x12 should have been dequeued
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
@@ -315,10 +444,12 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
}
|
||||
}
|
||||
|
||||
it should "dequeue invalidated entries by itself" in {
|
||||
it should "dequeue invalidated head on its own when allowShift" in {
|
||||
test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
@@ -338,19 +469,33 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
// invalidate two entries at head
|
||||
c.io.invalidate.valid.poke(true.B)
|
||||
c.io.invalidate.bits.poke(0x3.U)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
// [ 0x56 | 0x34(inv) | 0x12(inv) ]
|
||||
c.clock.step()
|
||||
// [ 0x56 | 0x34(inv) ]
|
||||
// [ 0x56 | 0x34(inv) ]
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.clock.step()
|
||||
// [ 0x56 ]
|
||||
// [ 0x56 ]
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x56.U)
|
||||
c.clock.step()
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(false.B)
|
||||
c.clock.step()
|
||||
|
||||
// do one more enqueue-then-dequeue to see if used bit was properly cleared
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
c.io.queue.enq.ready.expect(true.B)
|
||||
c.io.queue.enq.valid.poke(true.B)
|
||||
c.io.queue.enq.bits.poke(0x78.U)
|
||||
c.clock.step()
|
||||
// should dequeue right away
|
||||
c.io.queue.enq.valid.poke(false.B)
|
||||
c.io.queue.deq.ready.poke(true.B)
|
||||
c.io.queue.deq.valid.expect(true.B)
|
||||
c.io.queue.deq.bits.expect(0x78.U)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,6 +503,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||
c.io.invalidate.valid.poke(false.B)
|
||||
c.io.invalidate.bits.poke(0.U)
|
||||
c.io.allowShift.poke(true.B)
|
||||
|
||||
// prepare
|
||||
c.io.queue.deq.ready.poke(false.B)
|
||||
@@ -383,24 +529,23 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
}
|
||||
}
|
||||
|
||||
object testConfig extends CoalescerConfig(
|
||||
maxSize = 5,
|
||||
object uncoalescerTestConfig extends CoalescerConfig(
|
||||
numLanes = 4,
|
||||
queueDepth = 2,
|
||||
waitTimeout = 8,
|
||||
addressWidth = 24,
|
||||
dataBusWidth = 5,
|
||||
numLanes = 4,
|
||||
// watermark = 2,
|
||||
wordSizeInBytes = 4,
|
||||
wordWidth = 2,
|
||||
numOldSrcIds = 16,
|
||||
numNewSrcIds = 4,
|
||||
respQueueDepth = 4,
|
||||
coalSizes = Seq(4, 5),
|
||||
coalLogSizes = Seq(4),
|
||||
sizeEnum = DefaultInFlightTableSizeEnum
|
||||
)
|
||||
|
||||
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
behavior of "uncoalescer"
|
||||
val numLanes = 4
|
||||
val numPerLaneReqs = 2
|
||||
@@ -410,8 +555,8 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
val coalDataWidth = 128
|
||||
val numInflightCoalRequests = 4
|
||||
|
||||
it should "work" in {
|
||||
test(new UncoalescingUnit(testConfig))
|
||||
it should "work in general case" in {
|
||||
test(new Uncoalescer(uncoalescerTestConfig))
|
||||
// vcs helps with simulation time, but sometimes errors with
|
||||
// "mutation occurred during iteration" java error
|
||||
// .withAnnotations(Seq(VcsBackendAnnotation))
|
||||
@@ -426,7 +571,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
|
||||
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
|
||||
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
|
||||
c.io.newEntry.lanes(0).reqs(1).offset.poke(0.U)
|
||||
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes
|
||||
c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four)
|
||||
c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B)
|
||||
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
||||
@@ -460,7 +605,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
// offset is counting from LSB
|
||||
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||
c.io.uncoalResps(0)(0).bits.source.expect(1.U)
|
||||
c.io.uncoalResps(0)(1).bits.data.expect(0xdeadbeefL.U)
|
||||
c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U)
|
||||
c.io.uncoalResps(0)(1).bits.source.expect(2.U)
|
||||
c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U)
|
||||
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
|
||||
@@ -468,6 +613,67 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
c.io.uncoalResps(2)(1).bits.source.expect(2.U)
|
||||
}
|
||||
}
|
||||
|
||||
it should "uncoalesce when coalesced to the same word offset" in {
|
||||
test(new Uncoalescer(uncoalescerTestConfig))
|
||||
// .withAnnotations(Seq(VcsBackendAnnotation))
|
||||
{ c =>
|
||||
val sourceId = 0.U
|
||||
val four = c.io.newEntry.sizeEnumT.FOUR
|
||||
c.io.coalReqValid.poke(true.B)
|
||||
c.io.newEntry.source.poke(sourceId)
|
||||
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
|
||||
c.io.newEntry.lanes(0).reqs(0).source.poke(0.U)
|
||||
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
|
||||
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
|
||||
c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B)
|
||||
c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B)
|
||||
c.io.newEntry.lanes(1).reqs(0).source.poke(1.U)
|
||||
c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U)
|
||||
c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four)
|
||||
c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B)
|
||||
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
||||
c.io.newEntry.lanes(2).reqs(0).source.poke(2.U)
|
||||
c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U)
|
||||
c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four)
|
||||
c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B)
|
||||
c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B)
|
||||
c.io.newEntry.lanes(3).reqs(0).source.poke(3.U)
|
||||
c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U)
|
||||
c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four)
|
||||
c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B)
|
||||
|
||||
c.clock.step()
|
||||
|
||||
c.io.coalReqValid.poke(false.B)
|
||||
|
||||
c.clock.step()
|
||||
|
||||
c.io.coalResp.valid.poke(true.B)
|
||||
c.io.coalResp.bits.source.poke(sourceId)
|
||||
val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL)
|
||||
c.io.coalResp.bits.data.poke(lit.U)
|
||||
|
||||
// table lookup is combinational at the same cycle
|
||||
// offset is counting from LSB
|
||||
c.io.uncoalResps(0)(0).valid.expect(true.B)
|
||||
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||
c.io.uncoalResps(0)(0).bits.source.expect(0.U)
|
||||
c.io.uncoalResps(0)(1).valid.expect(false.B)
|
||||
c.io.uncoalResps(1)(0).valid.expect(true.B)
|
||||
c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||
c.io.uncoalResps(1)(0).bits.source.expect(1.U)
|
||||
c.io.uncoalResps(1)(1).valid.expect(false.B)
|
||||
c.io.uncoalResps(2)(0).valid.expect(true.B)
|
||||
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
|
||||
c.io.uncoalResps(2)(1).valid.expect(false.B)
|
||||
c.io.uncoalResps(3)(0).valid.expect(true.B)
|
||||
c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||
c.io.uncoalResps(3)(0).bits.source.expect(3.U)
|
||||
c.io.uncoalResps(3)(1).valid.expect(false.B)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
|
||||
Reference in New Issue
Block a user