Merge branch 'graphics' of https://github.com/hansungk/rocket-chip into graphics
This commit is contained in:
@@ -36,29 +36,29 @@ object DefaultInFlightTableSizeEnum extends InFlightTableSizeEnum {
|
|||||||
}
|
}
|
||||||
|
|
||||||
case class CoalescerConfig(
|
case class CoalescerConfig(
|
||||||
numLanes: Int, // number of lanes (or threads) in a warp
|
numLanes: Int, // number of lanes (or threads) in a warp
|
||||||
maxSize: Int, // maximum burst size (64 bytes)
|
queueDepth: Int, // request window per lane
|
||||||
queueDepth: Int, // request window per lane
|
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
||||||
waitTimeout: Int, // max cycles to wait before forced fifo dequeue, per lane
|
addressWidth: Int, // assume <= 32
|
||||||
addressWidth: Int, // assume <= 32
|
dataBusWidth: Int, // memory-side downstream TileLink data bus size
|
||||||
dataBusWidth: Int, // memory-side downstream TileLink data bus size
|
// this has to be at least larger than the word size for
|
||||||
// this has to be at least larger than the word size for
|
// the coalescer to perform well
|
||||||
// the coalescer to perform well
|
// watermark = 2, // minimum buffer occupancy to start coalescing
|
||||||
// watermark = 2, // minimum buffer occupancy to start coalescing
|
wordSizeInBytes: Int, // 32-bit system
|
||||||
wordSizeInBytes: Int, // 32-bit system
|
wordWidth: Int, // log(WORD_SIZE)
|
||||||
wordWidth: Int, // log(WORD_SIZE)
|
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
|
||||||
numOldSrcIds: Int, // num of outstanding requests per lane, from processor
|
numNewSrcIds: Int, // num of outstanding coalesced requests
|
||||||
numNewSrcIds: Int, // num of outstanding coalesced requests
|
respQueueDepth: Int, // depth of the response fifo queues
|
||||||
respQueueDepth: Int, // depth of the response fifo queues
|
coalLogSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
||||||
coalSizes: Seq[Int], // list of coalescer sizes to try in the MonoCoalescers
|
// each size is log(byteSize)
|
||||||
// must be power of 2's
|
sizeEnum: InFlightTableSizeEnum,
|
||||||
sizeEnum: InFlightTableSizeEnum
|
) {
|
||||||
)
|
// maximum coalesced size
|
||||||
|
def maxCoalLogSize: Int = coalLogSizes.max
|
||||||
|
}
|
||||||
|
|
||||||
object defaultConfig extends CoalescerConfig(
|
object defaultConfig extends CoalescerConfig(
|
||||||
numLanes = 4,
|
numLanes = 4,
|
||||||
// TODO: bigger size
|
|
||||||
maxSize = 3,
|
|
||||||
queueDepth = 1,
|
queueDepth = 1,
|
||||||
waitTimeout = 8,
|
waitTimeout = 8,
|
||||||
addressWidth = 24,
|
addressWidth = 24,
|
||||||
@@ -69,7 +69,7 @@ object defaultConfig extends CoalescerConfig(
|
|||||||
numOldSrcIds = 16,
|
numOldSrcIds = 16,
|
||||||
numNewSrcIds = 4,
|
numNewSrcIds = 4,
|
||||||
respQueueDepth = 4,
|
respQueueDepth = 4,
|
||||||
coalSizes = Seq(3),
|
coalLogSizes = Seq(3),
|
||||||
sizeEnum = DefaultInFlightTableSizeEnum
|
sizeEnum = DefaultInFlightTableSizeEnum
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -153,10 +153,14 @@ class ReqSourceGen(sourceWidth: Int) extends Module {
|
|||||||
// A shift-register queue implementation that supports invalidating entries
|
// A shift-register queue implementation that supports invalidating entries
|
||||||
// and exposing queue contents as output IO. (TODO: support deadline)
|
// and exposing queue contents as output IO. (TODO: support deadline)
|
||||||
// Initially copied from freechips.rocketchip.util.ShiftQueue.
|
// Initially copied from freechips.rocketchip.util.ShiftQueue.
|
||||||
// If `pipe` is true, support enqueueing to a full queue when also dequeueing.
|
// The queue only shifts down when `allowShift` is given true. Dequeueing
|
||||||
|
// works normally, but if allowShift was false, the queue head will stay
|
||||||
|
// invalid after dequeueing. This option is added in order to synchronize the
|
||||||
|
// shifting of the queues between lanes to model the SIMD behavior.
|
||||||
|
// If `pipe` is true, support enqueueing to a full queue when head is being
|
||||||
|
// dequeued at the next cycle.
|
||||||
// Software model: window.py
|
// Software model: window.py
|
||||||
class CoalShiftQueue[T <: Data](
|
class CoalShiftQueue[T <: Data]( gen: T,
|
||||||
gen: T,
|
|
||||||
val entries: Int,
|
val entries: Int,
|
||||||
pipe: Boolean = true,
|
pipe: Boolean = true,
|
||||||
flow: Boolean = false
|
flow: Boolean = false
|
||||||
@@ -164,6 +168,7 @@ class CoalShiftQueue[T <: Data](
|
|||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val queue = new QueueIO(gen, entries)
|
val queue = new QueueIO(gen, entries)
|
||||||
val invalidate = Input(Valid(UInt(entries.W)))
|
val invalidate = Input(Valid(UInt(entries.W)))
|
||||||
|
val allowShift = Input(Bool())
|
||||||
val mask = Output(UInt(entries.W))
|
val mask = Output(UInt(entries.W))
|
||||||
val elts = Output(Vec(entries, gen))
|
val elts = Output(Vec(entries, gen))
|
||||||
// 'QueueIO' provides io.count, but we might not want to use it in the
|
// 'QueueIO' provides io.count, but we might not want to use it in the
|
||||||
@@ -192,7 +197,7 @@ class CoalShiftQueue[T <: Data](
|
|||||||
def paddedUsed = pad({ i: Int => used(i) })
|
def paddedUsed = pad({ i: Int => used(i) })
|
||||||
def validAfterInv(i: Int) = valid(i) && (!io.invalidate.valid || !io.invalidate.bits(i))
|
def validAfterInv(i: Int) = valid(i) && (!io.invalidate.valid || !io.invalidate.bits(i))
|
||||||
|
|
||||||
val shift = (used =/= 0.U) && (io.queue.deq.ready || !validAfterInv(0))
|
val shift = io.allowShift && (used =/= 0.U) && (io.queue.deq.fire || !validAfterInv(0))
|
||||||
for (i <- 0 until entries) {
|
for (i <- 0 until entries) {
|
||||||
val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
|
val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
|
||||||
val wen = Mux(
|
val wen = Mux(
|
||||||
@@ -208,27 +213,28 @@ class CoalShiftQueue[T <: Data](
|
|||||||
(io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
|
(io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
|
||||||
(io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i)
|
(io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i)
|
||||||
)
|
)
|
||||||
|
// additionally, head entry should get invalidated when dequeue fired
|
||||||
|
// but queue didn't shift (e.g. because allowShift was false)
|
||||||
|
when (io.queue.deq.fire && !shift) {
|
||||||
|
valid(0) := false.B
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
when(io.queue.enq.fire) {
|
when(io.queue.enq.fire) {
|
||||||
when(!io.queue.deq.fire) {
|
when(!shift) {
|
||||||
used := (used << 1.U) | 1.U
|
used := (used << 1.U) | 1.U
|
||||||
}
|
}
|
||||||
}.elsewhen(io.queue.deq.fire) {
|
}.elsewhen(shift) {
|
||||||
used := used >> 1.U
|
used := used >> 1.U
|
||||||
}
|
}
|
||||||
|
|
||||||
io.queue.enq.ready := !valid(entries - 1)
|
io.queue.enq.ready := !valid(entries - 1)
|
||||||
// We don't want to invalidate deq.valid response right away even when
|
io.queue.deq.valid := validAfterInv(0)
|
||||||
// io.invalidate(head) is true.
|
|
||||||
// Coalescing unit consumes queue head's validity, and produces its new
|
|
||||||
// validity. Deasserting deq.valid right away will result in a combinational
|
|
||||||
// cycle.
|
|
||||||
io.queue.deq.valid := valid(0)
|
|
||||||
io.queue.deq.bits := elts.head
|
io.queue.deq.bits := elts.head
|
||||||
|
|
||||||
assert(!flow, "flow-through is not implemented")
|
assert(!flow, "flow-through is not implemented")
|
||||||
if (flow) {
|
if (flow) {
|
||||||
|
// FIXME old code
|
||||||
when(io.queue.enq.valid) { io.queue.deq.valid := true.B }
|
when(io.queue.enq.valid) { io.queue.deq.valid := true.B }
|
||||||
when(!valid(0)) { io.queue.deq.bits := io.queue.enq.bits }
|
when(!valid(0)) { io.queue.deq.bits := io.queue.enq.bits }
|
||||||
}
|
}
|
||||||
@@ -243,7 +249,7 @@ class CoalShiftQueue[T <: Data](
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Software model: coalescer.py
|
// Software model: coalescer.py
|
||||||
class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
||||||
config: CoalescerConfig) extends Module {
|
config: CoalescerConfig) extends Module {
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
||||||
@@ -251,8 +257,10 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
val leaderIdx = Output(UInt(log2Ceil(config.numLanes).W))
|
||||||
val baseAddr = Output(UInt(config.addressWidth.W))
|
val baseAddr = Output(UInt(config.addressWidth.W))
|
||||||
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
|
val matchOH = Output(Vec(config.numLanes, UInt(config.queueDepth.W)))
|
||||||
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth).W))
|
// number of entries matched with this leader lane's head.
|
||||||
val coverageHits = Output(UInt((1 << config.maxSize).W))
|
// maximum is numLanes * queueDepth
|
||||||
|
val matchCount = Output(UInt(log2Ceil(config.numLanes * config.queueDepth + 1).W))
|
||||||
|
val coverageHits = Output(UInt((1 << config.maxCoalLogSize).W))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -277,14 +285,12 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
leadersValid(i), head.source, head.address)
|
leadersValid(i), head.source, head.address)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// debug assertions and prints
|
|
||||||
when (leadersValid.reduce(_ || _)) {
|
when (leadersValid.reduce(_ || _)) {
|
||||||
assert(testNoQueueDrift, "unexpected drift between lane request queues")
|
assert(testNoQueueDrift, "unexpected drift between lane request queues")
|
||||||
printQueueHeads
|
// printQueueHeads
|
||||||
}
|
}
|
||||||
|
|
||||||
val size = coalSize
|
val size = coalLogSize
|
||||||
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
|
val addrMask = (((1 << config.addressWidth) - 1) - ((1 << size) - 1)).U
|
||||||
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
|
def canMatch(req0: ReqQueueEntry, req0v: Bool, req1: ReqQueueEntry, req1v: Bool): Bool = {
|
||||||
(req0.op === req1.op) &&
|
(req0.op === req1.op) &&
|
||||||
@@ -294,18 +300,24 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
|
|
||||||
// Gives a 2-D table of Bools representing match at every queue entry,
|
// Gives a 2-D table of Bools representing match at every queue entry,
|
||||||
// for each lane (so 3-D in total).
|
// for each lane (so 3-D in total).
|
||||||
val matchTablePerLane = (leaders zip leadersValid).map { case (leader, leaderValid) =>
|
val matchTablePerLane = (leaders zip leadersValid).zipWithIndex
|
||||||
// TODO: match leader to only lanes >= leader idx
|
.map { case ((leader, leaderValid), leaderIndex) =>
|
||||||
io.window.map { followerLane =>
|
io.window.zipWithIndex.map { case (followerQueue, followerIndex) =>
|
||||||
// compare leader's head against follower's every queue entry
|
// compare leader's head against follower's every queue entry
|
||||||
(followerLane.elts zip followerLane.mask.asBools).map { case (follower, followerValid) =>
|
(followerQueue.elts zip followerQueue.mask.asBools)
|
||||||
canMatch(follower, followerValid, leader, leaderValid)
|
.map { case (follower, followerValid) =>
|
||||||
|
// match leader to only followers at lanes >= leader idx
|
||||||
|
// this halves the number of comparators
|
||||||
|
if (followerIndex < leaderIndex) false.B
|
||||||
|
else canMatch(follower, followerValid, leader, leaderValid)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: potentially expensive: popcount & adder
|
// TODO: potentially expensive: popcount & adder
|
||||||
val matchCounts = matchTablePerLane.map(leader => leader.map(PopCount(_)).reduce(_ +& _))
|
val matchCounts = matchTablePerLane.map(table =>
|
||||||
|
table.map(PopCount(_)) // sum up each column
|
||||||
|
.reduce(_ +& _))
|
||||||
val canCoalesce = matchCounts.map(_ > 1.U)
|
val canCoalesce = matchCounts.map(_ > 1.U)
|
||||||
|
|
||||||
// TODO: potentially expensive
|
// TODO: potentially expensive
|
||||||
@@ -323,6 +335,18 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
})(chosenLeaderIdx)
|
})(chosenLeaderIdx)
|
||||||
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
|
val chosenMatchCount = VecInit(matchCounts)(chosenLeaderIdx)
|
||||||
|
|
||||||
|
// coverage calculation
|
||||||
|
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
||||||
|
// 2-D table flattened to 1-D
|
||||||
|
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
||||||
|
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
||||||
|
// indicates whether each word in the coalesced chunk is accessed by any of the
|
||||||
|
// queue entries. e.g. if [ 1 1 1 1 ], all of the four words in the coalesced
|
||||||
|
// data has been accessed and we've reached 100% utilization.
|
||||||
|
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
||||||
|
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
||||||
|
}
|
||||||
|
|
||||||
// debug prints
|
// debug prints
|
||||||
when (leadersValid.reduce(_ || _)) {
|
when (leadersValid.reduce(_ || _)) {
|
||||||
matchCounts.zipWithIndex.foreach { case (count, i) =>
|
matchCounts.zipWithIndex.foreach { case (count, i) =>
|
||||||
@@ -334,14 +358,13 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
printf("%d ", m)
|
printf("%d ", m)
|
||||||
}
|
}
|
||||||
printf("]\n")
|
printf("]\n")
|
||||||
}
|
printf("chosenMatchCount = %d\n", chosenMatchCount)
|
||||||
|
|
||||||
// coverage calculation
|
printf("hits = [ ")
|
||||||
def getOffsetSlice(addr: UInt) = addr(size - 1, config.wordWidth)
|
hits.foreach { m =>
|
||||||
val offsets = io.window.map(_.elts).flatMap(_.map(req => getOffsetSlice(req.address)))
|
printf("%d ", m)
|
||||||
val valids = io.window.map(_.mask).flatMap(_.asBools)
|
}
|
||||||
val hits = Seq.tabulate(1 << (size - config.wordWidth)) { target =>
|
printf("]\n")
|
||||||
(offsets zip valids).map { case (offset, valid) => valid && (offset === target.U) }.reduce(_ || _)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
io.results.leaderIdx := chosenLeaderIdx
|
io.results.leaderIdx := chosenLeaderIdx
|
||||||
@@ -354,19 +377,21 @@ class MonoCoalescer(coalSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
|
|||||||
// Software model: coalescer.py
|
// Software model: coalescer.py
|
||||||
class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry,
|
class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry,
|
||||||
config: CoalescerConfig) extends Module {
|
config: CoalescerConfig) extends Module {
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
|
// coalescing window, connected to the contents of the request queues
|
||||||
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
val window = Input(Vec(config.numLanes, windowT.io.cloneType))
|
||||||
val outReq = DecoupledIO(coalReqT.cloneType)
|
// generated coalesced request
|
||||||
|
val coalReq = DecoupledIO(coalReqT.cloneType)
|
||||||
|
// invalidate signals going into each request queue's head
|
||||||
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
|
val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
|
||||||
})
|
})
|
||||||
|
|
||||||
val coalescers = config.coalSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
|
val coalescers = config.coalLogSizes.map(size => Module(new MonoCoalescer(size, windowT, config)))
|
||||||
coalescers.foreach(_.io.window := io.window)
|
coalescers.foreach(_.io.window := io.window)
|
||||||
|
|
||||||
def normalize(x: Seq[UInt]): Seq[UInt] = {
|
def normalize(valPerSize: Seq[UInt]): Seq[UInt] = {
|
||||||
x.zip(config.coalSizes).map { case (hits, size) =>
|
(valPerSize zip config.coalLogSizes).map { case (hits, size) =>
|
||||||
(hits << (config.maxSize - size).U).asUInt
|
(hits << (config.maxCoalLogSize - size).U).asUInt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -378,27 +403,40 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
}._2
|
}._2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalize to maximum coalescing size so that we can do fair comparisons
|
||||||
|
// between coalescing results of different sizes
|
||||||
val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount))
|
val normalizedMatches = normalize(coalescers.map(_.io.results.matchCount))
|
||||||
val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits))
|
val normalizedHits = normalize(coalescers.map(_.io.results.coverageHits))
|
||||||
|
|
||||||
val chosenIdx = Wire(UInt(log2Ceil(config.coalSizes.size).W))
|
val chosenSizeIdx = Wire(UInt(log2Ceil(config.coalLogSizes.size).W))
|
||||||
val chosenValid = Wire(Bool())
|
val chosenValid = Wire(Bool())
|
||||||
// minimum 25% coverage
|
// minimum 25% coverage
|
||||||
val minCoverage = 1.max(1 << (config.maxSize - 4))
|
val minCoverage = 1.max(1 << ((config.maxCoalLogSize - 2) - 2))
|
||||||
|
|
||||||
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
|
when (normalizedHits.map(_ > minCoverage.U).reduce(_ || _)) {
|
||||||
chosenIdx := argMax(normalizedHits)
|
chosenSizeIdx := argMax(normalizedHits)
|
||||||
chosenValid := true.B
|
chosenValid := true.B
|
||||||
|
printf("coalescing success by coverage policy\n")
|
||||||
}.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) {
|
}.elsewhen(normalizedMatches.map(_ > 1.U).reduce(_ || _)) {
|
||||||
chosenIdx := argMax(normalizedMatches)
|
chosenSizeIdx := argMax(normalizedMatches)
|
||||||
chosenValid := true.B
|
chosenValid := true.B
|
||||||
|
printf("coalescing success by matches policy\n")
|
||||||
}.otherwise {
|
}.otherwise {
|
||||||
chosenIdx := DontCare
|
chosenSizeIdx := DontCare
|
||||||
chosenValid := false.B
|
chosenValid := false.B
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def debugPolicyPrint() = {
|
||||||
|
printf("matchCount[0]=%d\n", coalescers(0).io.results.matchCount)
|
||||||
|
printf("normalizedMatches[0]=%d\n", normalizedMatches(0))
|
||||||
|
printf("coverageHits[0]=%d\n", coalescers(0).io.results.coverageHits)
|
||||||
|
printf("normalizedHits[0]=%d\n", normalizedHits(0))
|
||||||
|
printf("minCoverage=%d\n", minCoverage.U)
|
||||||
|
}
|
||||||
|
|
||||||
// create coalesced request
|
// create coalesced request
|
||||||
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenIdx)
|
val chosenBundle = VecInit(coalescers.map(_.io.results))(chosenSizeIdx)
|
||||||
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenIdx)
|
val chosenSize = VecInit(coalescers.map(_.size.U))(chosenSizeIdx)
|
||||||
|
|
||||||
// flatten requests and matches
|
// flatten requests and matches
|
||||||
val flatReqs = io.window.flatMap(_.elts)
|
val flatReqs = io.window.flatMap(_.elts)
|
||||||
@@ -411,8 +449,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
|
|
||||||
// note: this is word-level coalescing. if finer granularity is needed, need to modify code
|
// note: this is word-level coalescing. if finer granularity is needed, need to modify code
|
||||||
val numWords = (1.U << (chosenSize - config.wordWidth.U)).asUInt
|
val numWords = (1.U << (chosenSize - config.wordWidth.U)).asUInt
|
||||||
val maxWords = 1 << (config.maxSize - config.wordWidth)
|
val maxWords = 1 << (config.maxCoalLogSize - config.wordWidth)
|
||||||
val addrMask = Wire(UInt(config.maxSize.W))
|
val addrMask = Wire(UInt(config.maxCoalLogSize.W))
|
||||||
addrMask := (1.U << chosenSize).asUInt - 1.U
|
addrMask := (1.U << chosenSize).asUInt - 1.U
|
||||||
|
|
||||||
val data = Wire(Vec(maxWords, UInt((config.wordSizeInBytes * 8).W)))
|
val data = Wire(Vec(maxWords, UInt((config.wordSizeInBytes * 8).W)))
|
||||||
@@ -420,7 +458,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
|
|
||||||
for (i <- 0 until maxWords) {
|
for (i <- 0 until maxWords) {
|
||||||
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
|
val sel = flatReqs.zip(flatMatches).map { case (req, m) =>
|
||||||
m && ((req.address(config.maxSize - 1, 0) & addrMask) === i.U)
|
m && ((req.address(config.maxCoalLogSize - 1, 0) & addrMask) === i.U)
|
||||||
}
|
}
|
||||||
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
|
// TODO: SW uses priority encoder, not sure about behavior of MuxCase
|
||||||
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
|
data(i) := MuxCase(DontCare, flatReqs.zip(sel).map { case (req, s) =>
|
||||||
@@ -435,18 +473,20 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
}
|
}
|
||||||
|
|
||||||
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
|
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
|
||||||
sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created
|
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
|
||||||
|
|
||||||
io.outReq.bits.source := sourceGen.io.id.bits
|
val coalesceValid = chosenValid && sourceGen.io.id.valid
|
||||||
io.outReq.bits.mask := mask.asUInt
|
|
||||||
io.outReq.bits.data := data.asUInt
|
io.coalReq.bits.source := sourceGen.io.id.bits
|
||||||
io.outReq.bits.size := chosenSize
|
io.coalReq.bits.mask := mask.asUInt
|
||||||
io.outReq.bits.address := chosenBundle.baseAddr
|
io.coalReq.bits.data := data.asUInt
|
||||||
io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
|
io.coalReq.bits.size := chosenSize
|
||||||
io.outReq.valid := chosenValid && sourceGen.io.id.valid
|
io.coalReq.bits.address := chosenBundle.baseAddr
|
||||||
|
io.coalReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
|
||||||
|
io.coalReq.valid := coalesceValid
|
||||||
|
|
||||||
io.invalidate.bits := chosenBundle.matchOH
|
io.invalidate.bits := chosenBundle.matchOH
|
||||||
io.invalidate.valid := io.outReq.fire // invalidate only when fire
|
io.invalidate.valid := io.coalReq.fire // invalidate only when fire
|
||||||
|
|
||||||
dontTouch(io.invalidate) // debug
|
dontTouch(io.invalidate) // debug
|
||||||
|
|
||||||
@@ -471,7 +511,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth))
|
Module(new CoalShiftQueue(reqQueueEntryT, config.queueDepth))
|
||||||
}
|
}
|
||||||
|
|
||||||
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.addressWidth, config.maxSize)
|
val coalReqT = new ReqQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.addressWidth, config.maxCoalLogSize)
|
||||||
val coalescer = Module(new MultiCoalescer(reqQueues.head, coalReqT, config))
|
val coalescer = Module(new MultiCoalescer(reqQueues.head, coalReqT, config))
|
||||||
coalescer.io.window := reqQueues.map(_.io)
|
coalescer.io.window := reqQueues.map(_.io)
|
||||||
|
|
||||||
@@ -511,20 +551,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready")
|
assert(reqQueue.io.queue.enq.ready, "reqQueue is supposed to be always ready")
|
||||||
reqQueue.io.queue.enq.valid := tlIn.a.valid
|
reqQueue.io.queue.enq.valid := tlIn.a.valid
|
||||||
reqQueue.io.queue.enq.bits := req
|
reqQueue.io.queue.enq.bits := req
|
||||||
// TODO: deq.ready should respect downstream ready
|
// TODO: deq.ready should respect downstream arbiter
|
||||||
reqQueue.io.queue.deq.ready := true.B
|
reqQueue.io.queue.deq.ready := true.B
|
||||||
|
// invalidate queue entries that contain original core requests that got
|
||||||
|
// coalesced into a wider one
|
||||||
reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane)
|
reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane)
|
||||||
reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid
|
reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid
|
||||||
|
reqQueue.io.allowShift := true.B
|
||||||
|
|
||||||
|
// NOTE: this relies on CoalShiftQueue's behavior combinationally
|
||||||
|
// deasserting deq.valid in the same cycle that the head invalidate
|
||||||
|
// signal goes up.
|
||||||
tlOut.a.valid := reqQueue.io.queue.deq.valid
|
tlOut.a.valid := reqQueue.io.queue.deq.valid
|
||||||
tlOut.a.bits := reqQueue.io.queue.deq.bits.toTLA(edgeOut)
|
tlOut.a.bits := reqQueue.io.queue.deq.bits.toTLA(edgeOut)
|
||||||
}
|
}
|
||||||
|
|
||||||
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
|
||||||
|
|
||||||
tlCoal.a.valid := coalescer.io.outReq.valid
|
tlCoal.a.valid := coalescer.io.coalReq.valid
|
||||||
tlCoal.a.bits := coalescer.io.outReq.bits.toTLA(edgeCoal)
|
tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
|
||||||
coalescer.io.outReq.ready := tlCoal.a.ready
|
coalescer.io.coalReq.ready := tlCoal.a.ready
|
||||||
tlCoal.b.ready := true.B
|
tlCoal.b.ready := true.B
|
||||||
tlCoal.c.valid := false.B
|
tlCoal.c.valid := false.B
|
||||||
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
|
// tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
|
||||||
@@ -541,7 +587,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
// coalesced request. Upper bound is min(DEPTH, 2**sourceWidth).
|
||||||
val numPerLaneReqs = config.queueDepth
|
val numPerLaneReqs = config.queueDepth
|
||||||
|
|
||||||
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxSize), config.maxSize)
|
val respQueueEntryT = new RespQueueEntry(sourceWidth, log2Ceil(config.maxCoalLogSize), config.maxCoalLogSize)
|
||||||
val respQueues = Seq.tabulate(config.numLanes) { _ =>
|
val respQueues = Seq.tabulate(config.numLanes) { _ =>
|
||||||
Module(
|
Module(
|
||||||
new MultiPortQueue(
|
new MultiPortQueue(
|
||||||
@@ -550,6 +596,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
// requests that didn't get coalesced, and M is the maximum number of
|
// requests that didn't get coalesced, and M is the maximum number of
|
||||||
// single-lane requests that can go into a coalesced request.
|
// single-lane requests that can go into a coalesced request.
|
||||||
// (`numPerLaneReqs`).
|
// (`numPerLaneReqs`).
|
||||||
|
// TODO: potentially expensive, because this generates more FFs.
|
||||||
|
// Rather than enqueueing all responses in a single cycle, consider
|
||||||
|
// enqueueing one by one (at the cost of possibly stalling downstream).
|
||||||
1 + numPerLaneReqs,
|
1 + numPerLaneReqs,
|
||||||
// deq_lanes = 1 because we're serializing all responses to 1 port that
|
// deq_lanes = 1 because we're serializing all responses to 1 port that
|
||||||
// goes back to the core.
|
// goes back to the core.
|
||||||
@@ -566,7 +615,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
val respQueueNoncoalPort = 0
|
val respQueueNoncoalPort = 0
|
||||||
val respQueueCoalPortOffset = 1
|
val respQueueUncoalPortOffset = 1
|
||||||
|
|
||||||
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
|
(outer.node.in zip outer.node.out).zipWithIndex.foreach {
|
||||||
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
|
case (((tlIn, edgeIn), (tlOut, _)), 0) => // TODO: not necessarily 1 master edge
|
||||||
@@ -645,51 +694,40 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
|
// logic to generate the Inflight Entry into the uncoalescer, where it should be.
|
||||||
// this also reduces top level clutter.
|
// this also reduces top level clutter.
|
||||||
|
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val uncoalescer = Module(new Uncoalescer(config))
|
||||||
// but the width of the size enum
|
|
||||||
val newEntry = Wire(
|
val newEntry = Wire(uncoalescer.inflightTable.entryT)
|
||||||
new InflightCoalReqTableEntry(
|
newEntry.source := coalescer.io.coalReq.bits.source
|
||||||
config.numLanes,
|
|
||||||
numPerLaneReqs,
|
|
||||||
sourceWidth,
|
|
||||||
offsetBits,
|
|
||||||
config.sizeEnum
|
|
||||||
)
|
|
||||||
)
|
|
||||||
println(s"=========== table sourceWidth: ${sourceWidth}")
|
|
||||||
// println(s"=========== table sizeEnumBits: ${newEntry.sizeEnumBits}")
|
|
||||||
newEntry.source := coalescer.io.outReq.bits.source
|
|
||||||
|
|
||||||
// TODO: richard to write table fill logic
|
// TODO: richard to write table fill logic
|
||||||
// FIXME: this assertion used to say 1 << config.MAX_SIZE
|
assert (config.maxCoalLogSize <= config.dataBusWidth,
|
||||||
// I changed this to say DATA BUS SIZE. We need another assertion
|
"multi-beat coalesced reads/writes are currently not supported")
|
||||||
// to assert that MAX_SIZE is <= DATA_BUS_SIZE because we do not support
|
assert (
|
||||||
// multi-beat writes currently
|
|
||||||
assert(
|
|
||||||
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
|
tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
|
||||||
s"tlCoal param dataBits (${tlCoal.params.dataBits}) mismatch coalescer constant"
|
s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
|
||||||
+ s" (${(1 << config.dataBusWidth) * 8})"
|
+ s" (${(1 << config.dataBusWidth) * 8})"
|
||||||
)
|
)
|
||||||
val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
|
val reqQueueHeads = reqQueues.map(q => q.io.queue.deq.bits)
|
||||||
newEntry.lanes.foreach { l =>
|
// Do a 2-D copy from every (numLanes * queueDepth) invalidate output of the
|
||||||
l.reqs.zipWithIndex.foreach { case (r, i) =>
|
// coalescer to every (numLanes * queueDepth) entry in the inflight table.
|
||||||
// TODO: this part needs the actual coalescing logic to work
|
(newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex
|
||||||
r.valid := false.B
|
.foreach { case ((laneEntry, laneInv), lane) =>
|
||||||
r.source := origReqs(i).source
|
(laneEntry.reqs zip laneInv.asBools).zipWithIndex
|
||||||
r.offset := (origReqs(i).address % (1 << config.maxSize).U) >> config.wordWidth
|
.foreach { case ((reqEntry, inv), i) =>
|
||||||
r.sizeEnum := config.sizeEnum.logSizeToEnum(origReqs(i).size)
|
val req = reqQueues(lane).io.elts(i)
|
||||||
|
when ((coalescer.io.invalidate.valid && inv)) {
|
||||||
|
printf(s"coalescer: reqQueue(${lane})(${i}) got invalidated (source=%d)\n", req.source)
|
||||||
|
}
|
||||||
|
reqEntry.valid := (coalescer.io.invalidate.valid && inv)
|
||||||
|
reqEntry.source := req.source
|
||||||
|
reqEntry.offset := ((req.address % (1 << config.maxCoalLogSize).U) >> config.wordWidth)
|
||||||
|
reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(req.size)
|
||||||
|
// TODO: load/store op
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
newEntry.lanes(0).reqs(0).valid := true.B
|
|
||||||
newEntry.lanes(1).reqs(0).valid := true.B
|
|
||||||
newEntry.lanes(2).reqs(0).valid := true.B
|
|
||||||
newEntry.lanes(3).reqs(0).valid := true.B
|
|
||||||
dontTouch(newEntry)
|
dontTouch(newEntry)
|
||||||
|
|
||||||
// Uncoalescer module uncoalesces responses back to each lane
|
uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid
|
||||||
val uncoalescer = Module(new UncoalescingUnit(config))
|
|
||||||
|
|
||||||
uncoalescer.io.coalReqValid := coalescer.io.outReq.valid
|
|
||||||
uncoalescer.io.newEntry := newEntry
|
uncoalescer.io.newEntry := newEntry
|
||||||
// Cleanup: custom <>?
|
// Cleanup: custom <>?
|
||||||
uncoalescer.io.coalResp.valid := tlCoal.d.valid
|
uncoalescer.io.coalResp.valid := tlCoal.d.valid
|
||||||
@@ -698,22 +736,26 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
tlCoal.d.ready := uncoalescer.io.coalResp.ready
|
tlCoal.d.ready := uncoalescer.io.coalResp.ready
|
||||||
|
|
||||||
// Queue up synthesized uncoalesced responses into each lane's response queue
|
// Queue up synthesized uncoalesced responses into each lane's response queue
|
||||||
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
(respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) =>
|
||||||
lanes.zipWithIndex.foreach { case (resp, i) =>
|
perLaneResps.zipWithIndex.foreach { case (resp, i) =>
|
||||||
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
|
// TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
|
||||||
// cache. This should ideally not happen though.
|
// cache. This should ideally not happen though.
|
||||||
assert(
|
assert(
|
||||||
q.io.enq(respQueueCoalPortOffset + i).ready,
|
q.io.enq(respQueueUncoalPortOffset + i).ready,
|
||||||
s"respQueue: enq port for 0-th coalesced response is blocked"
|
s"respQueue: enq port for ${i}-th uncoalesced response is blocked for lane ${lane}"
|
||||||
)
|
)
|
||||||
q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
|
q.io.enq(respQueueUncoalPortOffset + i).valid := resp.valid
|
||||||
q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
|
q.io.enq(respQueueUncoalPortOffset + i).bits := resp.bits
|
||||||
|
// debug
|
||||||
|
// when (resp.valid) {
|
||||||
|
// printf(s"${i}-th uncoalesced response came back from lane ${lane}\n")
|
||||||
|
// }
|
||||||
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug
|
// Debug
|
||||||
dontTouch(coalescer.io.outReq)
|
dontTouch(coalescer.io.coalReq)
|
||||||
val coalRespData = tlCoal.d.bits.data
|
val coalRespData = tlCoal.d.bits.data
|
||||||
dontTouch(coalRespData)
|
dontTouch(coalRespData)
|
||||||
|
|
||||||
@@ -730,10 +772,10 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
|
|||||||
// FIXME: overlaps with RespQueueEntry. Trait-ify
|
// FIXME: overlaps with RespQueueEntry. Trait-ify
|
||||||
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
|
class CoalescedResponseBundle(config: CoalescerConfig) extends Bundle {
|
||||||
val source = UInt(log2Ceil(config.numNewSrcIds).W)
|
val source = UInt(log2Ceil(config.numNewSrcIds).W)
|
||||||
val data = UInt((8 * (1 << config.maxSize)).W)
|
val data = UInt((8 * (1 << config.maxCoalLogSize)).W)
|
||||||
}
|
}
|
||||||
|
|
||||||
class UncoalescingUnit(config: CoalescerConfig) extends Module {
|
class Uncoalescer(config: CoalescerConfig) extends Module {
|
||||||
// notes to hansung:
|
// notes to hansung:
|
||||||
// val numLanes: Int, <-> config.NUM_LANES
|
// val numLanes: Int, <-> config.NUM_LANES
|
||||||
// val numPerLaneReqs: Int, <-> config.DEPTH
|
// val numPerLaneReqs: Int, <-> config.DEPTH
|
||||||
@@ -833,19 +875,21 @@ class UncoalescingUnit(config: CoalescerConfig) extends Module {
|
|||||||
// split the coalesced response back to individual per-lane responses with the
|
// split the coalesced response back to individual per-lane responses with the
|
||||||
// right metadata.
|
// right metadata.
|
||||||
class InflightCoalReqTable(config: CoalescerConfig) extends Module {
|
class InflightCoalReqTable(config: CoalescerConfig) extends Module {
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val offsetBits = config.maxCoalLogSize - config.wordWidth // assumes word offset
|
||||||
val sizeBits = 2 // FIXME hardcoded
|
|
||||||
val entryT = new InflightCoalReqTableEntry(
|
val entryT = new InflightCoalReqTableEntry(
|
||||||
config.numLanes,
|
config.numLanes,
|
||||||
config.queueDepth,
|
config.queueDepth,
|
||||||
log2Ceil(config.numOldSrcIds),
|
log2Ceil(config.numOldSrcIds),
|
||||||
config.maxSize,
|
config.maxCoalLogSize,
|
||||||
config.sizeEnum
|
config.sizeEnum
|
||||||
)
|
)
|
||||||
|
|
||||||
val entries = config.numNewSrcIds
|
val entries = config.numNewSrcIds
|
||||||
val sourceWidth = log2Ceil(config.numOldSrcIds)
|
val sourceWidth = log2Ceil(config.numOldSrcIds)
|
||||||
|
|
||||||
|
println(s"=========== table sourceWidth: ${sourceWidth}")
|
||||||
|
println(s"=========== table sizeEnumBits: ${entryT.sizeEnumT.getWidth}")
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val enq = Flipped(Decoupled(entryT))
|
val enq = Flipped(Decoupled(entryT))
|
||||||
// TODO: return actual stuff
|
// TODO: return actual stuff
|
||||||
|
|||||||
@@ -35,26 +35,46 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
|
|
||||||
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
class DummyCoalescingUnitTB(implicit p: Parameters) extends LazyModule {
|
||||||
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
|
val cpuNodes = Seq.tabulate(testConfig.numLanes) { _ =>
|
||||||
TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
|
TLClientNode(
|
||||||
name = "processor-nodes",
|
Seq(
|
||||||
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
TLMasterPortParameters.v1(
|
||||||
// requestFifo = true,
|
Seq(
|
||||||
visibility = Seq(AddressSet(0x0, 0xffffff))))))) // 24 bit address space (TODO probably use testConfig)
|
TLClientParameters(
|
||||||
|
name = "processor-nodes",
|
||||||
|
sourceId = IdRange(0, testConfig.numOldSrcIds),
|
||||||
|
visibility = Seq(AddressSet(0x0, 0xffffff))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
) // 24 bit address space (TODO probably use testConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
val device = new SimpleDevice("dummy", Seq("dummy"))
|
val device = new SimpleDevice("dummy", Seq("dummy"))
|
||||||
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
|
val beatBytes = 1 << testConfig.dataBusWidth // 256 bit bus
|
||||||
val l2Nodes = Seq.tabulate(5) { _ =>
|
val l2Nodes = Seq.tabulate(5) { _ =>
|
||||||
TLManagerNode(Seq(TLSlavePortParameters.v1(Seq(TLManagerParameters(
|
TLManagerNode(
|
||||||
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
Seq(
|
||||||
resources = device.reg,
|
TLSlavePortParameters.v1(
|
||||||
regionType = RegionType.UNCACHED,
|
Seq(
|
||||||
executable = true,
|
TLManagerParameters(
|
||||||
supportsGet = TransferSizes(1, beatBytes),
|
address = Seq(AddressSet(0x0, 0xffffff)), // should be matching cpuNode
|
||||||
supportsPutFull = TransferSizes(1, beatBytes),
|
resources = device.reg,
|
||||||
supportsPutPartial = TransferSizes(1, beatBytes),
|
regionType = RegionType.UNCACHED,
|
||||||
supportsHint = TransferSizes(1, beatBytes),
|
executable = true,
|
||||||
fifoId = Some(0))), beatBytes)))
|
supportsArithmetic = TransferSizes(1, beatBytes),
|
||||||
|
supportsLogical = TransferSizes(1, beatBytes),
|
||||||
|
supportsGet = TransferSizes(1, beatBytes),
|
||||||
|
supportsPutFull = TransferSizes(1, beatBytes),
|
||||||
|
supportsPutPartial = TransferSizes(1, beatBytes),
|
||||||
|
supportsHint = TransferSizes(1, beatBytes),
|
||||||
|
fifoId = Some(0)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
beatBytes
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val dut = LazyModule(new CoalescingUnit(testConfig))
|
val dut = LazyModule(new CoalescingUnit(testConfig))
|
||||||
@@ -81,84 +101,116 @@ class DummyCoalescingUnitTBImp(outer: DummyCoalescingUnitTB) extends LazyModuleI
|
|||||||
// val coalMasterNode = coal.coalescerNode.makeIOs()
|
// val coalMasterNode = coal.coalescerNode.makeIOs()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
object testConfig extends CoalescerConfig(
|
||||||
|
numLanes = 4,
|
||||||
|
queueDepth = 1,
|
||||||
|
waitTimeout = 8,
|
||||||
|
addressWidth = 24,
|
||||||
|
dataBusWidth = 5,
|
||||||
|
// watermark = 2,
|
||||||
|
wordSizeInBytes = 4,
|
||||||
|
wordWidth = 2,
|
||||||
|
numOldSrcIds = 16,
|
||||||
|
numNewSrcIds = 4,
|
||||||
|
respQueueDepth = 4,
|
||||||
|
coalLogSizes = Seq(3),
|
||||||
|
sizeEnum = DefaultInFlightTableSizeEnum
|
||||||
|
)
|
||||||
|
|
||||||
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class CoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
behavior of "multi- and mono-coalescers"
|
behavior of "multi- and mono-coalescers"
|
||||||
|
|
||||||
it should "coalesce fully consecutive accesses at size 4, only once" in {
|
implicit val p: Parameters = Parameters.empty
|
||||||
implicit val p: Parameters = Parameters.empty
|
|
||||||
|
|
||||||
val tb = LazyModule(new DummyCoalescingUnitTB())
|
def pokeA(
|
||||||
// val outer = LazyModule(new CoalescingUnit(testConfig))
|
nodes: Seq[TLBundle],
|
||||||
|
idx: Int,
|
||||||
val coal = tb.dut
|
op: Int,
|
||||||
|
size: Int,
|
||||||
test(tb.module).withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation)) { c =>
|
source: Int,
|
||||||
val nodes = c.coalIOs.map(_.head)
|
addr: Int,
|
||||||
// val nodes = c.cpuNodesImp.map(_.out.head._1)
|
mask: Int,
|
||||||
// val nodes = c.coal.node.in.map(_._1)
|
data: Int
|
||||||
// val nodes = c.mitmNodesImp.map(_.in.head._1)
|
): Unit = {
|
||||||
|
val node = nodes(idx)
|
||||||
def pokeA(nodes: Seq[TLBundle], idx: Int, op: Int, size: Int, source: Int, addr: Int, mask: Int, data: Int): Unit = {
|
|
||||||
val node = nodes(idx)
|
|
||||||
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
// node.a.ready.expect(true.B) // FIXME: this fails currently
|
||||||
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
node.a.bits.opcode.poke(if (op == 1) TLMessages.PutFullData else TLMessages.Get)
|
||||||
node.a.bits.param.poke(0.U)
|
node.a.bits.param.poke(0.U)
|
||||||
node.a.bits.size.poke(size.U)
|
node.a.bits.size.poke(size.U)
|
||||||
node.a.bits.source.poke(source.U)
|
node.a.bits.source.poke(source.U)
|
||||||
node.a.bits.address.poke(addr.U)
|
node.a.bits.address.poke(addr.U)
|
||||||
node.a.bits.mask.poke(mask.U)
|
node.a.bits.mask.poke(mask.U)
|
||||||
node.a.bits.data.poke(data.U)
|
node.a.bits.data.poke(data.U)
|
||||||
node.a.bits.corrupt.poke(false.B)
|
node.a.bits.corrupt.poke(false.B)
|
||||||
node.a.valid.poke(true.B)
|
node.a.valid.poke(true.B)
|
||||||
}
|
}
|
||||||
|
|
||||||
def unsetA(): Unit = {
|
def unsetA(nodes: Seq[TLBundle]): Unit = {
|
||||||
nodes.foreach { node =>
|
nodes.foreach { node =>
|
||||||
node.a.valid.poke(false.B)
|
node.a.valid.poke(false.B)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// always ready to take coalesced requests
|
// it should "coalesce fully consecutive accesses at size 4, only once" in {
|
||||||
// c.coalMasterNode.head.a.ready.poke(true.B)
|
// test(makeTb().module)
|
||||||
// c.coal.module.coalescer.io.outReq.ready.poke(true.B)
|
// .withAnnotations(Seq(VcsBackendAnnotation, WriteFsdbAnnotation))
|
||||||
|
// { c =>
|
||||||
|
// println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||||
|
// val nodes = c.coalIOs.map(_.head)
|
||||||
|
// // val nodes = c.cpuNodesImp.map(_.out.head._1)
|
||||||
|
// // val nodes = c.coal.node.in.map(_._1)
|
||||||
|
// // val nodes = c.mitmNodesImp.map(_.in.head._1)
|
||||||
|
|
||||||
pokeA(nodes, idx=0, op=1, size=2, source=0, addr=0x10, mask=0xf, data=0x1111)
|
// // always ready to take coalesced requests
|
||||||
pokeA(nodes, idx=1, op=1, size=2, source=0, addr=0x14, mask=0xf, data=0x2222)
|
// // c.coalMasterNode.head.a.ready.poke(true.B)
|
||||||
pokeA(nodes, idx=2, op=1, size=2, source=0, addr=0x18, mask=0xf, data=0x3333)
|
// // c.coal.module.coalescer.io.outReq.ready.poke(true.B)
|
||||||
pokeA(nodes, idx=3, op=1, size=2, source=0, addr=0x1c, mask=0xf, data=0x4444)
|
|
||||||
|
// pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x10, mask = 0xf, data = 0x1111)
|
||||||
|
// pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x14, mask = 0xf, data = 0x2222)
|
||||||
|
// pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||||
|
// pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x1c, mask = 0xf, data = 0x4444)
|
||||||
|
|
||||||
|
// c.clock.step()
|
||||||
|
|
||||||
|
// unsetA(nodes)
|
||||||
|
|
||||||
|
// c.clock.step()
|
||||||
|
// c.clock.step()
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
it should "coalesce identical addresses (stride of 0)" in {
|
||||||
|
test(LazyModule(new DummyCoalescingUnitTB()).module)
|
||||||
|
.withAnnotations(Seq(VcsBackendAnnotation))
|
||||||
|
{ c =>
|
||||||
|
println(s"coalIO length = ${c.coalIOs(0).length}")
|
||||||
|
val nodes = c.coalIOs.map(_.head)
|
||||||
|
|
||||||
|
pokeA(nodes, idx = 0, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x1111)
|
||||||
|
pokeA(nodes, idx = 1, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x2222)
|
||||||
|
pokeA(nodes, idx = 2, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x3333)
|
||||||
|
pokeA(nodes, idx = 3, op = 1, size = 2, source = 0, addr = 0x18, mask = 0xf, data = 0x4444)
|
||||||
|
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
|
|
||||||
unsetA()
|
unsetA(nodes)
|
||||||
|
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
it should "coalesce strided accesses at size 6" in {
|
it should "coalesce strided accesses at size 6" in {}
|
||||||
|
|
||||||
}
|
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {}
|
||||||
|
|
||||||
it should "coalesce the coalescable chunk and leave 2 uncoalescable requests" in {
|
it should "not touch uncoalescable requests" in {}
|
||||||
|
|
||||||
}
|
it should "allow temporal coalescing when depth >=2" in {}
|
||||||
|
|
||||||
it should "not touch uncoalescable requests" in {
|
it should "select the most coverage mono-coalescer" in {}
|
||||||
|
|
||||||
}
|
it should "resort to the backup policy when coverage is below average" in {}
|
||||||
|
|
||||||
it should "allow temporal coalescing when depth >=2" in {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
it should "select the most coverage mono-coalescer" in {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
it should "resort to the backup policy when coverage is below average" in {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
@@ -167,6 +219,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
it should "work like normal shiftqueue when no invalidate" in {
|
it should "work like normal shiftqueue when no invalidate" in {
|
||||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||||
c.io.queue.deq.ready.poke(false.B)
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
|
||||||
c.io.queue.enq.ready.expect(true.B)
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
c.io.queue.enq.valid.poke(true.B)
|
c.io.queue.enq.valid.poke(true.B)
|
||||||
@@ -215,6 +268,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
it should "work when enqueing and dequeueing simultaneously" in {
|
it should "work when enqueing and dequeueing simultaneously" in {
|
||||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
|
||||||
// prepare
|
// prepare
|
||||||
c.io.queue.deq.ready.poke(true.B)
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
@@ -243,9 +297,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
it should "work when enqueing and dequeueing simultaneously to a full queue" in {
|
it should "not shift entries when allowShift is false" in {
|
||||||
|
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||||
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
|
|
||||||
|
c.io.allowShift.poke(false.B)
|
||||||
|
|
||||||
|
// prepare
|
||||||
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
|
c.io.queue.enq.valid.poke(true.B)
|
||||||
|
c.io.queue.enq.bits.poke(0x12.U)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
|
c.io.queue.enq.valid.poke(true.B)
|
||||||
|
c.io.queue.enq.bits.poke(0x34.U)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.queue.enq.valid.poke(false.B)
|
||||||
|
|
||||||
|
// dequeueing should work normally when allowShift is false...
|
||||||
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
|
c.io.queue.deq.valid.expect(true.B)
|
||||||
|
c.io.queue.deq.bits.expect(0x12.U)
|
||||||
|
c.clock.step()
|
||||||
|
// but should stop there and not dequeue the next entry
|
||||||
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
|
c.io.queue.deq.valid.expect(false.B)
|
||||||
|
c.clock.step()
|
||||||
|
// when allowShift is back one, dequeueing should start working from next
|
||||||
|
// cycle
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
|
c.io.queue.deq.valid.expect(true.B)
|
||||||
|
c.io.queue.deq.bits.expect(0x34.U)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
it should "work when enqueing and dequeueing simultaneously to a depth=1 queue" in {
|
||||||
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
|
||||||
// prepare
|
// prepare
|
||||||
c.io.queue.deq.ready.poke(true.B)
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
@@ -282,9 +374,47 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
it should "invalidate head being dequeued" in {
|
it should "work when invalidating and enqueueing to a depth=1 queue" in {
|
||||||
|
test(new CoalShiftQueue(UInt(8.W), 1)) { c =>
|
||||||
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
// no dequeueing
|
||||||
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
|
|
||||||
|
// prepare
|
||||||
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
|
c.io.queue.enq.valid.poke(true.B)
|
||||||
|
c.io.queue.enq.bits.poke(0x12.U)
|
||||||
|
c.clock.step()
|
||||||
|
// invalidate, but don't allow shift
|
||||||
|
c.io.allowShift.poke(false.B)
|
||||||
|
c.io.invalidate.valid.poke(true.B)
|
||||||
|
c.io.invalidate.bits.poke(0x1.U)
|
||||||
|
// TODO: we might be able to enqueue to a full depth=1 queue whose only
|
||||||
|
// entry just got invalidated, so that enq.ready is true here, but
|
||||||
|
// it is a niche case
|
||||||
|
c.io.queue.enq.ready.expect(false.B)
|
||||||
|
c.clock.step()
|
||||||
|
// now try enqueueing now that we have space
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
|
c.io.queue.enq.valid.poke(true.B)
|
||||||
|
c.io.queue.enq.bits.poke(0x34.U)
|
||||||
|
c.io.queue.deq.valid.expect(false.B)
|
||||||
|
c.clock.step()
|
||||||
|
// see if it comes out right next cycle
|
||||||
|
c.io.queue.enq.valid.poke(false.B)
|
||||||
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
|
c.io.queue.deq.valid.expect(true.B)
|
||||||
|
c.io.queue.deq.bits.expect(0x34.U)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
it should "invalidate head that is also being dequeued" in {
|
||||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
|
||||||
// prepare
|
// prepare
|
||||||
c.io.queue.deq.ready.poke(false.B)
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
@@ -300,12 +430,11 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
c.io.queue.enq.valid.poke(false.B)
|
c.io.queue.enq.valid.poke(false.B)
|
||||||
|
|
||||||
// invalidate should work for the head just being dequeued at the same
|
// invalidate should work for the head just being dequeued at the same
|
||||||
// cycle. However, it should not change deq.valid right away to avoid
|
// cycle
|
||||||
// combinational cycles (see definition).
|
|
||||||
c.io.invalidate.valid.poke(true.B)
|
c.io.invalidate.valid.poke(true.B)
|
||||||
c.io.invalidate.bits.poke(0x1.U)
|
c.io.invalidate.bits.poke(0x1.U)
|
||||||
c.io.queue.deq.ready.poke(true.B)
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
c.io.queue.deq.valid.expect(true.B)
|
c.io.queue.deq.valid.expect(false.B)
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
// 0x12 should have been dequeued
|
// 0x12 should have been dequeued
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
@@ -315,10 +444,12 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
it should "dequeue invalidated entries by itself" in {
|
it should "dequeue invalidated head on its own when allowShift" in {
|
||||||
test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c =>
|
test(new CoalShiftQueue(gen = UInt(8.W), entries = 4)) { c =>
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
|
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
|
||||||
// prepare
|
// prepare
|
||||||
c.io.queue.deq.ready.poke(false.B)
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
c.io.queue.enq.ready.expect(true.B)
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
@@ -338,19 +469,33 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
// invalidate two entries at head
|
// invalidate two entries at head
|
||||||
c.io.invalidate.valid.poke(true.B)
|
c.io.invalidate.valid.poke(true.B)
|
||||||
c.io.invalidate.bits.poke(0x3.U)
|
c.io.invalidate.bits.poke(0x3.U)
|
||||||
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
// [ 0x56 | 0x34(inv) | 0x12(inv) ]
|
// [ 0x56 | 0x34(inv) | 0x12(inv) ]
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
// [ 0x56 | 0x34(inv) ]
|
// [ 0x56 | 0x34(inv) ]
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
c.io.queue.deq.ready.poke(false.B)
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
// [ 0x56 ]
|
// [ 0x56 ]
|
||||||
c.io.queue.deq.ready.poke(true.B)
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
c.io.queue.deq.valid.expect(true.B)
|
c.io.queue.deq.valid.expect(true.B)
|
||||||
c.io.queue.deq.bits.expect(0x56.U)
|
c.io.queue.deq.bits.expect(0x56.U)
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
c.io.queue.deq.ready.poke(true.B)
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
c.io.queue.deq.valid.expect(false.B)
|
c.io.queue.deq.valid.expect(false.B)
|
||||||
|
c.clock.step()
|
||||||
|
|
||||||
|
// do one more enqueue-then-dequeue to see if used bit was properly cleared
|
||||||
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
|
c.io.queue.enq.ready.expect(true.B)
|
||||||
|
c.io.queue.enq.valid.poke(true.B)
|
||||||
|
c.io.queue.enq.bits.poke(0x78.U)
|
||||||
|
c.clock.step()
|
||||||
|
// should dequeue right away
|
||||||
|
c.io.queue.enq.valid.poke(false.B)
|
||||||
|
c.io.queue.deq.ready.poke(true.B)
|
||||||
|
c.io.queue.deq.valid.expect(true.B)
|
||||||
|
c.io.queue.deq.bits.expect(0x78.U)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -358,6 +503,7 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
test(new CoalShiftQueue(UInt(8.W), 4)) { c =>
|
||||||
c.io.invalidate.valid.poke(false.B)
|
c.io.invalidate.valid.poke(false.B)
|
||||||
c.io.invalidate.bits.poke(0.U)
|
c.io.invalidate.bits.poke(0.U)
|
||||||
|
c.io.allowShift.poke(true.B)
|
||||||
|
|
||||||
// prepare
|
// prepare
|
||||||
c.io.queue.deq.ready.poke(false.B)
|
c.io.queue.deq.ready.poke(false.B)
|
||||||
@@ -383,24 +529,23 @@ class CoalShiftQueueTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
object testConfig extends CoalescerConfig(
|
object uncoalescerTestConfig extends CoalescerConfig(
|
||||||
maxSize = 5,
|
numLanes = 4,
|
||||||
queueDepth = 2,
|
queueDepth = 2,
|
||||||
waitTimeout = 8,
|
waitTimeout = 8,
|
||||||
addressWidth = 24,
|
addressWidth = 24,
|
||||||
dataBusWidth = 5,
|
dataBusWidth = 5,
|
||||||
numLanes = 4,
|
|
||||||
// watermark = 2,
|
// watermark = 2,
|
||||||
wordSizeInBytes = 4,
|
wordSizeInBytes = 4,
|
||||||
wordWidth = 2,
|
wordWidth = 2,
|
||||||
numOldSrcIds = 16,
|
numOldSrcIds = 16,
|
||||||
numNewSrcIds = 4,
|
numNewSrcIds = 4,
|
||||||
respQueueDepth = 4,
|
respQueueDepth = 4,
|
||||||
coalSizes = Seq(4, 5),
|
coalLogSizes = Seq(4),
|
||||||
sizeEnum = DefaultInFlightTableSizeEnum
|
sizeEnum = DefaultInFlightTableSizeEnum
|
||||||
)
|
)
|
||||||
|
|
||||||
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class UncoalescerUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
behavior of "uncoalescer"
|
behavior of "uncoalescer"
|
||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
val numPerLaneReqs = 2
|
val numPerLaneReqs = 2
|
||||||
@@ -410,8 +555,8 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
val coalDataWidth = 128
|
val coalDataWidth = 128
|
||||||
val numInflightCoalRequests = 4
|
val numInflightCoalRequests = 4
|
||||||
|
|
||||||
it should "work" in {
|
it should "work in general case" in {
|
||||||
test(new UncoalescingUnit(testConfig))
|
test(new Uncoalescer(uncoalescerTestConfig))
|
||||||
// vcs helps with simulation time, but sometimes errors with
|
// vcs helps with simulation time, but sometimes errors with
|
||||||
// "mutation occurred during iteration" java error
|
// "mutation occurred during iteration" java error
|
||||||
// .withAnnotations(Seq(VcsBackendAnnotation))
|
// .withAnnotations(Seq(VcsBackendAnnotation))
|
||||||
@@ -426,7 +571,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
|
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
|
||||||
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
|
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
|
||||||
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
|
c.io.newEntry.lanes(0).reqs(1).source.poke(2.U)
|
||||||
c.io.newEntry.lanes(0).reqs(1).offset.poke(0.U)
|
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U) // same offset to different lanes
|
||||||
c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four)
|
c.io.newEntry.lanes(0).reqs(1).sizeEnum.poke(four)
|
||||||
c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B)
|
c.io.newEntry.lanes(1).reqs(0).valid.poke(false.B)
|
||||||
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
||||||
@@ -460,7 +605,7 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
// offset is counting from LSB
|
// offset is counting from LSB
|
||||||
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
|
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
c.io.uncoalResps(0)(0).bits.source.expect(1.U)
|
c.io.uncoalResps(0)(0).bits.source.expect(1.U)
|
||||||
c.io.uncoalResps(0)(1).bits.data.expect(0xdeadbeefL.U)
|
c.io.uncoalResps(0)(1).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
c.io.uncoalResps(0)(1).bits.source.expect(2.U)
|
c.io.uncoalResps(0)(1).bits.source.expect(2.U)
|
||||||
c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U)
|
c.io.uncoalResps(2)(0).bits.data.expect(0x89abcdefL.U)
|
||||||
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
|
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
|
||||||
@@ -468,6 +613,67 @@ class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
c.io.uncoalResps(2)(1).bits.source.expect(2.U)
|
c.io.uncoalResps(2)(1).bits.source.expect(2.U)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
it should "uncoalesce when coalesced to the same word offset" in {
|
||||||
|
test(new Uncoalescer(uncoalescerTestConfig))
|
||||||
|
// .withAnnotations(Seq(VcsBackendAnnotation))
|
||||||
|
{ c =>
|
||||||
|
val sourceId = 0.U
|
||||||
|
val four = c.io.newEntry.sizeEnumT.FOUR
|
||||||
|
c.io.coalReqValid.poke(true.B)
|
||||||
|
c.io.newEntry.source.poke(sourceId)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).source.poke(0.U)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).sizeEnum.poke(four)
|
||||||
|
c.io.newEntry.lanes(0).reqs(1).valid.poke(false.B)
|
||||||
|
c.io.newEntry.lanes(1).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(1).reqs(0).source.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(1).reqs(0).offset.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(1).reqs(0).sizeEnum.poke(four)
|
||||||
|
c.io.newEntry.lanes(1).reqs(1).valid.poke(false.B)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).source.poke(2.U)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).offset.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).sizeEnum.poke(four)
|
||||||
|
c.io.newEntry.lanes(2).reqs(1).valid.poke(false.B)
|
||||||
|
c.io.newEntry.lanes(3).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(3).reqs(0).source.poke(3.U)
|
||||||
|
c.io.newEntry.lanes(3).reqs(0).offset.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(3).reqs(0).sizeEnum.poke(four)
|
||||||
|
c.io.newEntry.lanes(3).reqs(1).valid.poke(false.B)
|
||||||
|
|
||||||
|
c.clock.step()
|
||||||
|
|
||||||
|
c.io.coalReqValid.poke(false.B)
|
||||||
|
|
||||||
|
c.clock.step()
|
||||||
|
|
||||||
|
c.io.coalResp.valid.poke(true.B)
|
||||||
|
c.io.coalResp.bits.source.poke(sourceId)
|
||||||
|
val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL)
|
||||||
|
c.io.coalResp.bits.data.poke(lit.U)
|
||||||
|
|
||||||
|
// table lookup is combinational at the same cycle
|
||||||
|
// offset is counting from LSB
|
||||||
|
c.io.uncoalResps(0)(0).valid.expect(true.B)
|
||||||
|
c.io.uncoalResps(0)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
|
c.io.uncoalResps(0)(0).bits.source.expect(0.U)
|
||||||
|
c.io.uncoalResps(0)(1).valid.expect(false.B)
|
||||||
|
c.io.uncoalResps(1)(0).valid.expect(true.B)
|
||||||
|
c.io.uncoalResps(1)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
|
c.io.uncoalResps(1)(0).bits.source.expect(1.U)
|
||||||
|
c.io.uncoalResps(1)(1).valid.expect(false.B)
|
||||||
|
c.io.uncoalResps(2)(0).valid.expect(true.B)
|
||||||
|
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
|
c.io.uncoalResps(2)(0).bits.source.expect(2.U)
|
||||||
|
c.io.uncoalResps(2)(1).valid.expect(false.B)
|
||||||
|
c.io.uncoalResps(3)(0).valid.expect(true.B)
|
||||||
|
c.io.uncoalResps(3)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
|
c.io.uncoalResps(3)(0).bits.source.expect(3.U)
|
||||||
|
c.io.uncoalResps(3)(1).valid.expect(false.B)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
|
|||||||
Reference in New Issue
Block a user