Fix backpressure handling in MemTraceDriver when attached to SoC
This commit is contained in:
@@ -80,7 +80,8 @@ object defaultConfig extends CoalescerConfig(
|
|||||||
// watermark = 2,
|
// watermark = 2,
|
||||||
wordSizeInBytes = 4,
|
wordSizeInBytes = 4,
|
||||||
wordWidth = 2,
|
wordWidth = 2,
|
||||||
numOldSrcIds = 16,
|
// when attaching to SoC, 16 source IDs are not enough due to longer latency
|
||||||
|
numOldSrcIds = 64,
|
||||||
numNewSrcIds = 4,
|
numNewSrcIds = 4,
|
||||||
respQueueDepth = 4,
|
respQueueDepth = 4,
|
||||||
coalLogSizes = Seq(3),
|
coalLogSizes = Seq(3),
|
||||||
@@ -1104,19 +1105,26 @@ class TraceLine extends Bundle with HasTraceLine {
|
|||||||
val data = UInt(64.W)
|
val data = UInt(64.W)
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFile: String)
|
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String)
|
||||||
extends LazyModuleImp(outer)
|
extends LazyModuleImp(outer)
|
||||||
with UnitTestModule {
|
with UnitTestModule {
|
||||||
// Current cycle mark to read from trace
|
// Current cycle mark to read from trace
|
||||||
val traceReadCycle = RegInit(1.U(64.W))
|
val traceReadCycle = RegInit(1.U(64.W))
|
||||||
|
|
||||||
// If any of the downstream lane is not ready, hold on from advancing
|
// A decoupling queue to handle backpressure from downstream. We let the
|
||||||
val downstreamReady = outer.laneNodes.map(_.out(0)._1.a.ready).reduce(_ && _)
|
// downstream take requests from the queue individually for each lane,
|
||||||
|
// but do synchronized enqueue whenever all lane queue is ready to prevent
|
||||||
|
// drifts between the lane.
|
||||||
|
val reqQueues = Seq.fill(config.numLanes)(Module(new Queue(new TraceLine, 2)))
|
||||||
|
// Are we safe to read the next warp?
|
||||||
|
val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _)
|
||||||
|
|
||||||
val sim = Module(new SimMemTrace(traceFile, config.numLanes))
|
val sim = Module(new SimMemTrace(filename, config.numLanes))
|
||||||
sim.io.clock := clock
|
sim.io.clock := clock
|
||||||
sim.io.reset := reset.asBool
|
sim.io.reset := reset.asBool
|
||||||
sim.io.trace_read.ready := downstreamReady
|
// 'sim.io.trace_ready.ready' is a ready signal going into the DPI sim,
|
||||||
|
// indicating this Chisel module is ready to read the next line.
|
||||||
|
sim.io.trace_read.ready := reqQueueAllReady
|
||||||
sim.io.trace_read.cycle := traceReadCycle
|
sim.io.trace_read.cycle := traceReadCycle
|
||||||
|
|
||||||
// Read output from Verilog BlackBox
|
// Read output from Verilog BlackBox
|
||||||
@@ -1134,23 +1142,19 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil
|
|||||||
req.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i)
|
req.data := sim.io.trace_read.data(dataW * (i + 1) - 1, dataW * i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// def missedLine = {
|
// Not all fire because trace cycle has to advance even when there is no valid
|
||||||
// val existsValidLine = WireInit(false.B)
|
// line in the trace.
|
||||||
// existsValidLine := laneReqs.map(_.valid).reduce(_||_)
|
when (reqQueueAllReady){
|
||||||
// val missedLine = WireInit(false.B)
|
|
||||||
// missedLine := !downstreamReady && existsValidLine
|
|
||||||
|
|
||||||
// // Debug
|
|
||||||
// dontTouch(downstreamReady)
|
|
||||||
// dontTouch(existsValidLine)
|
|
||||||
// dontTouch(missedLine)
|
|
||||||
|
|
||||||
// missedLine
|
|
||||||
// }
|
|
||||||
when (downstreamReady){
|
|
||||||
traceReadCycle := traceReadCycle + 1.U
|
traceReadCycle := traceReadCycle + 1.U
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enqueue traces to the request queue
|
||||||
|
(reqQueues zip laneReqs).foreach { case (reqQ, req) =>
|
||||||
|
// Synchronized enqueue
|
||||||
|
reqQ.io.enq.valid := reqQueueAllReady && req.valid
|
||||||
|
reqQ.io.enq.bits := req // FIXME duplicate valid
|
||||||
|
}
|
||||||
|
|
||||||
// To prevent collision of sourceId with a current in-flight message,
|
// To prevent collision of sourceId with a current in-flight message,
|
||||||
// just use a counter that increments indefinitely as the sourceId of new
|
// just use a counter that increments indefinitely as the sourceId of new
|
||||||
// messages.
|
// messages.
|
||||||
@@ -1164,8 +1168,14 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil
|
|||||||
Cat(8.U(4.W), addr(27, 0))
|
Cat(8.U(4.W), addr(27, 0))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate TL requests corresponding to the trace lines
|
// Take requests off of the queue and generate TL requests
|
||||||
(outer.laneNodes zip laneReqs).foreach { case (node, req) =>
|
(outer.laneNodes zip reqQueues).foreach { case (node, reqQ) =>
|
||||||
|
val (tlOut, edge) = node.out(0)
|
||||||
|
|
||||||
|
val req = reqQ.io.deq.bits
|
||||||
|
// backpressure from downstream propagates into the queue
|
||||||
|
reqQ.io.deq.ready := tlOut.a.ready
|
||||||
|
|
||||||
// Core only makes accesses of granularity larger than a word, so we want
|
// Core only makes accesses of granularity larger than a word, so we want
|
||||||
// the trace driver to act so as well.
|
// the trace driver to act so as well.
|
||||||
// That means if req.size is smaller than word size, we need to pad data
|
// That means if req.size is smaller than word size, we need to pad data
|
||||||
@@ -1183,7 +1193,6 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil
|
|||||||
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
|
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
|
||||||
val wordAlignedSize = Mux(subword, 2.U, req.size)
|
val wordAlignedSize = Mux(subword, 2.U, req.size)
|
||||||
|
|
||||||
val (tlOut, edge) = node.out(0)
|
|
||||||
val (plegal, pbits) = edge.Put(
|
val (plegal, pbits) = edge.Put(
|
||||||
fromSource = sourceIdCounter,
|
fromSource = sourceIdCounter,
|
||||||
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
||||||
@@ -1199,9 +1208,21 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil
|
|||||||
val legal = Mux(req.is_store, plegal, glegal)
|
val legal = Mux(req.is_store, plegal, glegal)
|
||||||
val bits = Mux(req.is_store, pbits, gbits)
|
val bits = Mux(req.is_store, pbits, gbits)
|
||||||
|
|
||||||
|
tlOut.a.valid := reqQ.io.deq.valid
|
||||||
|
when (tlOut.a.valid) {
|
||||||
|
assert(legal, "illegal TL req gen")
|
||||||
|
}
|
||||||
|
tlOut.a.bits := bits
|
||||||
|
tlOut.b.ready := true.B
|
||||||
|
tlOut.c.valid := false.B
|
||||||
|
tlOut.d.ready := true.B
|
||||||
|
tlOut.e.valid := false.B
|
||||||
|
|
||||||
|
// debug
|
||||||
when(tlOut.a.valid) {
|
when(tlOut.a.valid) {
|
||||||
TLPrintf(
|
TLPrintf(
|
||||||
"MemTraceDriver",
|
"MemTraceDriver",
|
||||||
|
tlOut.a.bits.source,
|
||||||
tlOut.a.bits.address,
|
tlOut.a.bits.address,
|
||||||
tlOut.a.bits.size,
|
tlOut.a.bits.size,
|
||||||
tlOut.a.bits.mask,
|
tlOut.a.bits.mask,
|
||||||
@@ -1210,17 +1231,6 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil
|
|||||||
req.data
|
req.data
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(legal, "illegal TL req gen")
|
|
||||||
tlOut.a.valid := req.valid
|
|
||||||
tlOut.a.bits := bits
|
|
||||||
tlOut.b.ready := true.B
|
|
||||||
tlOut.c.valid := false.B
|
|
||||||
tlOut.d.ready := true.B
|
|
||||||
tlOut.e.valid := false.B
|
|
||||||
|
|
||||||
println(s"======= MemTraceDriver: TL data width: ${tlOut.params.dataBits}")
|
|
||||||
|
|
||||||
dontTouch(tlOut.a)
|
dontTouch(tlOut.a)
|
||||||
dontTouch(tlOut.d)
|
dontTouch(tlOut.d)
|
||||||
}
|
}
|
||||||
@@ -1232,12 +1242,13 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, traceFil
|
|||||||
finishCounter := finishCounter - 1.U
|
finishCounter := finishCounter - 1.U
|
||||||
}
|
}
|
||||||
io.finished := (finishCounter === 0.U)
|
io.finished := (finishCounter === 0.U)
|
||||||
// when(io.finished) {
|
|
||||||
// assert(
|
when(io.finished) {
|
||||||
// false.B,
|
assert(
|
||||||
// "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
false.B,
|
||||||
// )
|
"\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
||||||
// }
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1376,6 +1387,7 @@ class MemTraceLogger(
|
|||||||
when(req.valid) {
|
when(req.valid) {
|
||||||
TLPrintf(
|
TLPrintf(
|
||||||
s"MemTraceLogger (${loggerName}:downstream)",
|
s"MemTraceLogger (${loggerName}:downstream)",
|
||||||
|
tlIn.a.bits.source,
|
||||||
tlIn.a.bits.address,
|
tlIn.a.bits.address,
|
||||||
tlIn.a.bits.size,
|
tlIn.a.bits.size,
|
||||||
tlIn.a.bits.mask,
|
tlIn.a.bits.mask,
|
||||||
@@ -1533,6 +1545,7 @@ class TLPrintf {}
|
|||||||
object TLPrintf {
|
object TLPrintf {
|
||||||
def apply(
|
def apply(
|
||||||
printer: String,
|
printer: String,
|
||||||
|
source: UInt,
|
||||||
address: UInt,
|
address: UInt,
|
||||||
size: UInt,
|
size: UInt,
|
||||||
mask: UInt,
|
mask: UInt,
|
||||||
@@ -1540,7 +1553,8 @@ object TLPrintf {
|
|||||||
tlData: UInt,
|
tlData: UInt,
|
||||||
reqData: UInt
|
reqData: UInt
|
||||||
) = {
|
) = {
|
||||||
printf(s"${printer}: TL addr=%x, size=%d, mask=%x, store=%d", address, size, mask, is_store)
|
printf(s"${printer}: TL source=%d, addr=%x, size=%d, mask=%x, store=%d",
|
||||||
|
source, address, size, mask, is_store)
|
||||||
when(is_store) {
|
when(is_store) {
|
||||||
printf(", tlData=%x, reqData=%x", tlData, reqData)
|
printf(", tlData=%x, reqData=%x", tlData, reqData)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user