From 1886aefcc1c92b1f2735bdd4853e759a7bd60ef1 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 9 May 2023 22:22:27 -0700 Subject: [PATCH 1/4] Parameterize tracefile has_source from Config --- src/main/resources/csrc/SimMemTrace.cc | 4 ++-- src/main/resources/csrc/SimMemTrace.h | 2 +- src/main/resources/vsrc/SimMemTrace.v | 9 ++++--- src/main/scala/tilelink/Coalescing.scala | 24 +++++++++++-------- src/main/scala/tilelink/TracerSystemMem.scala | 24 +++++++------------ 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/main/resources/csrc/SimMemTrace.cc b/src/main/resources/csrc/SimMemTrace.cc index 6c08858..70328b8 100644 --- a/src/main/resources/csrc/SimMemTrace.cc +++ b/src/main/resources/csrc/SimMemTrace.cc @@ -152,7 +152,7 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id, assert(!"unreachable"); } -extern "C" void memtrace_init(const char *filename) { +extern "C" void memtrace_init(const char *filename, bool has_source) { #ifndef NO_VPI s_vpi_vlog_info info; if (!vpi_get_vlog_info(&info)) { @@ -175,7 +175,7 @@ extern "C" void memtrace_init(const char *filename) { reader = std::make_unique(filename); // parse file upfront // driver trace file is assumed to not have source id - reader->parse(false); + reader->parse(has_source); } // TODO: accept core_id as well diff --git a/src/main/resources/csrc/SimMemTrace.h b/src/main/resources/csrc/SimMemTrace.h index 753385e..0f9126c 100644 --- a/src/main/resources/csrc/SimMemTrace.h +++ b/src/main/resources/csrc/SimMemTrace.h @@ -44,7 +44,7 @@ public: FILE *outfile; }; -extern "C" void memtrace_init(const char *filename); +extern "C" void memtrace_init(const char *filename, bool has_source); extern "C" void memtrace_query(unsigned char trace_read_ready, unsigned long trace_read_cycle, int trace_read_lane_id, diff --git a/src/main/resources/vsrc/SimMemTrace.v b/src/main/resources/vsrc/SimMemTrace.v index 74594cb..37280b1 100644 --- a/src/main/resources/vsrc/SimMemTrace.v +++ b/src/main/resources/vsrc/SimMemTrace.v @@ -4,7 +4,8 @@ `define LOGSIZE_WIDTH 8 import "DPI-C" function void memtrace_init( - input string filename + input string filename, + input bit has_source ); // Make sure to sync the parameters for: @@ -24,7 +25,9 @@ import "DPI-C" function void memtrace_query output bit trace_read_finished ); -module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( +module SimMemTrace #(parameter FILENAME = "undefined", + NUM_LANES = 4, + HAS_SOURCE = 0) ( input clock, input reset, @@ -61,7 +64,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( initial begin /* $value$plusargs("uartlog=%s", __uartlog); */ - memtrace_init(FILENAME); + memtrace_init(FILENAME, HAS_SOURCE); end always @(posedge clock) begin diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 86fd3c0..e1d32c4 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -13,7 +13,7 @@ import freechips.rocketchip.unittest._ // TODO: find better place for these case class SIMTCoreParams(nLanes: Int = 4) -case class MemtraceCoreParams(tracefilename: String = "undefined") +case class MemtraceCoreParams(tracefilename: String = "undefined", traceHasSource: Boolean = false) case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/) case object MemtraceCoreKey extends Field[Option[MemtraceCoreParams]](None /*default*/) @@ -1062,9 +1062,11 @@ object TLUtils { } } -class MemTraceDriver(config: CoalescerConfig, filename: String)(implicit - p: Parameters -) extends LazyModule { +// `traceHasSource` is true if the input trace file has an additional source +// ID column. This is useful for using the output trace file genereated by +// MemTraceLogger as the driver. +class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: Boolean = false) + (implicit p: Parameters) extends LazyModule { // Create N client nodes together val laneNodes = Seq.tabulate(config.numLanes) { i => val clientParam = Seq( @@ -1082,7 +1084,7 @@ class MemTraceDriver(config: CoalescerConfig, filename: String)(implicit val node = TLIdentityNode() laneNodes.foreach { l => node := l } - lazy val module = new MemTraceDriverImp(this, config, filename) + lazy val module = new MemTraceDriverImp(this, config, filename, traceHasSource) } trait HasTraceLine { @@ -1105,7 +1107,8 @@ class TraceLine extends Bundle with HasTraceLine { val data = UInt(64.W) } -class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String) +class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String, + traceHasSource: Boolean) extends LazyModuleImp(outer) with UnitTestModule { // Current cycle mark to read from trace @@ -1119,7 +1122,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename // Are we safe to read the next warp? val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _) - val sim = Module(new SimMemTrace(filename, config.numLanes)) + val sim = Module(new SimMemTrace(filename, config.numLanes, traceHasSource)) sim.io.clock := clock sim.io.reset := reset.asBool // 'sim.io.trace_ready.ready' is a ready signal going into the DPI sim, @@ -1251,10 +1254,11 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename } } - -class SimMemTrace(filename: String, numLanes: Int) +class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean) extends BlackBox( - Map("FILENAME" -> filename, "NUM_LANES" -> numLanes) + Map("FILENAME" -> filename, + "NUM_LANES" -> numLanes, + "HAS_SOURCE" -> (if (traceHasSource) 1 else 0)) ) with HasBlackBoxResource { val traceLineT = new TraceLine diff --git a/src/main/scala/tilelink/TracerSystemMem.scala b/src/main/scala/tilelink/TracerSystemMem.scala index 75c25a4..ab303f3 100644 --- a/src/main/scala/tilelink/TracerSystemMem.scala +++ b/src/main/scala/tilelink/TracerSystemMem.scala @@ -1,8 +1,8 @@ package freechips.rocketchip.tilelink -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.subsystem.{BaseSubsystem} -import org.chipsalliance.cde.config.{Parameters, Config} +import freechips.rocketchip.diplomacy.LazyModule +import freechips.rocketchip.subsystem.BaseSubsystem +import org.chipsalliance.cde.config.Parameters // The trait is attached to DigitalTop of Chipyard system, informing it indeed // has the ability to attach GPU tracer node onto the system bus @@ -13,20 +13,14 @@ trait CanHaveMemtraceCore { this: BaseSubsystem => // Safe to use get as WithMemtraceCore requires WithNLanes to be defined val simtParam = p(SIMTCoreKey).get val config = defaultConfig.copy(numLanes = simtParam.nLanes) - val tracer = LazyModule(new MemTraceDriver(config, param.tracefilename)(p)) + val tracer = LazyModule( + new MemTraceDriver(config, param.tracefilename, param.traceHasSource)(p) + ) // Must use :=* to ensure the N edges from Tracer doesn't get merged into 1 // when connecting to SBus - println(s"============ MemTraceDriver instantiated [filename=${param.tracefilename}]") + println( + s"============ MemTraceDriver instantiated [filename=${param.tracefilename}]" + ) sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node } } - -//This is used by Chip Level Config, the config which creates the SoC -class WithMemtraceCore(tracefilename: String) - extends Config((site, _, _) => { case MemtraceCoreKey => - require( - site(SIMTCoreKey).isDefined, - "Memtrace core requires a SIMT configuration. Use WithNLanes to enable SIMT." - ) - Some(MemtraceCoreParams(tracefilename)) - }) From 6032d79eadaefd84bc8c913dc3e1b12c7f80daec Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 9 May 2023 23:46:11 -0700 Subject: [PATCH 2/4] Implement proper source gen --- src/main/scala/tilelink/Coalescing.scala | 45 ++++++++++++++++-------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index e1d32c4..c7b465c 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -187,19 +187,33 @@ class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, maxSize: Int) extends Bun } } -class ReqSourceGen(sourceWidth: Int) extends Module { +// If `ignoreInUse`, just keep giving out new IDs without checking if it is in +// use. +class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module { val io = IO(new Bundle { val gen = Input(Bool()) + val reclaim = Input(Valid(UInt(sourceWidth.W))) val id = Output(Valid(UInt(sourceWidth.W))) }) val head = RegInit(UInt(sourceWidth.W), 0.U) - head := Mux(io.gen, head + 1.U, head) - // FIXME: keep track of ones in use & set invalid when out - io.id.valid := true.B + val numSourceId = 1 << sourceWidth + // true: in use, false: available + val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W))) + when(reset.asBool) { + (0 until numSourceId).foreach { i => occupancyTable(i).valid := false.B } + } + + io.id.valid := (if (ignoreInUse) true.B else !occupancyTable(head).valid) io.id.bits := head + when (io.gen && io.id.valid /* fire */) { + occupancyTable(io.id.bits).valid := true.B // mark in use + } + when (io.reclaim.valid) { + occupancyTable(io.reclaim.bits).valid := false.B // mark freed + } } class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module { @@ -545,7 +559,7 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE }) } - val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds))) + val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))) sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created val coalesceValid = chosenValid && sourceGen.io.id.valid @@ -1158,12 +1172,6 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename reqQ.io.enq.bits := req // FIXME duplicate valid } - // To prevent collision of sourceId with a current in-flight message, - // just use a counter that increments indefinitely as the sourceId of new - // messages. - val sourceIdCounter = RegInit(0.U(64.W)) - sourceIdCounter := sourceIdCounter + 1.U - // Issue here is that Vortex mem range is not within Chipyard Mem range // In default setting, all mem-req for program data must be within // 0X80000000 -> 0X90000000 @@ -1196,22 +1204,27 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) val wordAlignedSize = Mux(subword, 2.U, req.size) + val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numOldSrcIds), + ignoreInUse = false)) + sourceGen.io.gen := reqQ.io.deq.fire + // assert(sourceGen.io.id.valid) + val (plegal, pbits) = edge.Put( - fromSource = sourceIdCounter, + fromSource = sourceGen.io.id.bits, toAddress = hashToValidPhyAddr(wordAlignedAddress), lgSize = wordAlignedSize, // trace line already holds log2(size) // data should be aligned to beatBytes data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt ) val (glegal, gbits) = edge.Get( - fromSource = sourceIdCounter, + fromSource = sourceGen.io.id.bits, toAddress = hashToValidPhyAddr(wordAlignedAddress), lgSize = wordAlignedSize ) val legal = Mux(req.is_store, plegal, glegal) val bits = Mux(req.is_store, pbits, gbits) - tlOut.a.valid := reqQ.io.deq.valid + tlOut.a.valid := (reqQ.io.deq.valid && sourceGen.io.id.valid) when (tlOut.a.valid) { assert(legal, "illegal TL req gen") } @@ -1221,6 +1234,10 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename tlOut.d.ready := true.B tlOut.e.valid := false.B + // Reclaim source id on response + sourceGen.io.reclaim.valid := tlOut.d.valid + sourceGen.io.reclaim.bits := tlOut.d.bits.source + // debug when(tlOut.a.valid) { TLPrintf( From 19d378dc3af01382369091d61e650f461d417636 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 10 May 2023 00:13:04 -0700 Subject: [PATCH 3/4] Fix sourceGen unasserted firrtl error --- src/main/scala/tilelink/Coalescing.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index c7b465c..218d36b 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -81,7 +81,7 @@ object defaultConfig extends CoalescerConfig( wordSizeInBytes = 4, wordWidth = 2, // when attaching to SoC, 16 source IDs are not enough due to longer latency - numOldSrcIds = 64, + numOldSrcIds = 16, numNewSrcIds = 4, respQueueDepth = 4, coalLogSizes = Seq(3), @@ -561,6 +561,8 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds))) sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created + sourceGen.io.reclaim.valid := false.B // not used + sourceGen.io.reclaim.bits := DontCare // not used val coalesceValid = chosenValid && sourceGen.io.id.valid From b48ab70e67e956b1d18fbba4b1f14231c09015e4 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 10 May 2023 00:26:25 -0700 Subject: [PATCH 4/4] Fix assertion falsely firing on invalid --- src/main/scala/tilelink/Coalescing.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala index 218d36b..4de4a99 100644 --- a/src/main/scala/tilelink/Coalescing.scala +++ b/src/main/scala/tilelink/Coalescing.scala @@ -1429,11 +1429,13 @@ class MemTraceLogger( // This assert only holds true for PutFullData and not PutPartialData, // where HIGH bits in the mask may not be contiguous. - assert( - PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), - "mask HIGH popcount do not match the TL size. " + - "Partial masks are not allowed for PutFull" - ) + when (tlIn.a.valid) { + assert( + PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), + "mask HIGH popcount do not match the TL size. " + + "Partial masks are not allowed for PutFull" + ) + } val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) val dataW = tlIn.params.dataBits val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U))