Merge remote-tracking branch 'origin/graphics' into local-dev-branch

This commit is contained in:
Vamber Yang
2023-05-10 19:02:16 -07:00
5 changed files with 73 additions and 51 deletions

View File

@@ -152,7 +152,7 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id,
assert(!"unreachable"); assert(!"unreachable");
} }
extern "C" void memtrace_init(const char *filename) { extern "C" void memtrace_init(const char *filename, bool has_source) {
#ifndef NO_VPI #ifndef NO_VPI
s_vpi_vlog_info info; s_vpi_vlog_info info;
if (!vpi_get_vlog_info(&info)) { if (!vpi_get_vlog_info(&info)) {
@@ -175,7 +175,7 @@ extern "C" void memtrace_init(const char *filename) {
reader = std::make_unique<MemTraceReader>(filename); reader = std::make_unique<MemTraceReader>(filename);
// parse file upfront // parse file upfront
// driver trace file is assumed to not have source id // driver trace file is assumed to not have source id
reader->parse(false); reader->parse(has_source);
} }
// TODO: accept core_id as well // TODO: accept core_id as well

View File

@@ -44,7 +44,7 @@ public:
FILE *outfile; FILE *outfile;
}; };
extern "C" void memtrace_init(const char *filename); extern "C" void memtrace_init(const char *filename, bool has_source);
extern "C" void memtrace_query(unsigned char trace_read_ready, extern "C" void memtrace_query(unsigned char trace_read_ready,
unsigned long trace_read_cycle, unsigned long trace_read_cycle,
int trace_read_lane_id, int trace_read_lane_id,

View File

@@ -4,7 +4,8 @@
`define LOGSIZE_WIDTH 8 `define LOGSIZE_WIDTH 8
import "DPI-C" function void memtrace_init( import "DPI-C" function void memtrace_init(
input string filename input string filename,
input bit has_source
); );
// Make sure to sync the parameters for: // Make sure to sync the parameters for:
@@ -24,7 +25,9 @@ import "DPI-C" function void memtrace_query
output bit trace_read_finished output bit trace_read_finished
); );
module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) ( module SimMemTrace #(parameter FILENAME = "undefined",
NUM_LANES = 4,
HAS_SOURCE = 0) (
input clock, input clock,
input reset, input reset,
@@ -61,7 +64,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
initial begin initial begin
/* $value$plusargs("uartlog=%s", __uartlog); */ /* $value$plusargs("uartlog=%s", __uartlog); */
memtrace_init(FILENAME); memtrace_init(FILENAME, HAS_SOURCE);
end end
always @(posedge clock) begin always @(posedge clock) begin

View File

@@ -13,7 +13,7 @@ import freechips.rocketchip.unittest._
// TODO: find better place for these // TODO: find better place for these
case class SIMTCoreParams(nLanes: Int = 4) case class SIMTCoreParams(nLanes: Int = 4)
case class MemtraceCoreParams(tracefilename: String = "undefined") case class MemtraceCoreParams(tracefilename: String = "undefined", traceHasSource: Boolean = false)
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/) case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/)
case object MemtraceCoreKey extends Field[Option[MemtraceCoreParams]](None /*default*/) case object MemtraceCoreKey extends Field[Option[MemtraceCoreParams]](None /*default*/)
@@ -81,7 +81,7 @@ object defaultConfig extends CoalescerConfig(
wordSizeInBytes = 4, wordSizeInBytes = 4,
wordWidth = 2, wordWidth = 2,
// when attaching to SoC, 16 source IDs are not enough due to longer latency // when attaching to SoC, 16 source IDs are not enough due to longer latency
numOldSrcIds = 64, numOldSrcIds = 16,
numNewSrcIds = 4, numNewSrcIds = 4,
respQueueDepth = 4, respQueueDepth = 4,
coalLogSizes = Seq(3), coalLogSizes = Seq(3),
@@ -187,19 +187,33 @@ class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, maxSize: Int) extends Bun
} }
} }
class ReqSourceGen(sourceWidth: Int) extends Module { // If `ignoreInUse`, just keep giving out new IDs without checking if it is in
// use.
class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val gen = Input(Bool()) val gen = Input(Bool())
val reclaim = Input(Valid(UInt(sourceWidth.W)))
val id = Output(Valid(UInt(sourceWidth.W))) val id = Output(Valid(UInt(sourceWidth.W)))
}) })
val head = RegInit(UInt(sourceWidth.W), 0.U) val head = RegInit(UInt(sourceWidth.W), 0.U)
head := Mux(io.gen, head + 1.U, head) head := Mux(io.gen, head + 1.U, head)
// FIXME: keep track of ones in use & set invalid when out val numSourceId = 1 << sourceWidth
io.id.valid := true.B // true: in use, false: available
val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W)))
when(reset.asBool) {
(0 until numSourceId).foreach { i => occupancyTable(i).valid := false.B }
}
io.id.valid := (if (ignoreInUse) true.B else !occupancyTable(head).valid)
io.id.bits := head io.id.bits := head
when (io.gen && io.id.valid /* fire */) {
occupancyTable(io.id.bits).valid := true.B // mark in use
}
when (io.reclaim.valid) {
occupancyTable(io.reclaim.bits).valid := false.B // mark freed
}
} }
class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module { class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module {
@@ -545,8 +559,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
}) })
} }
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds))) val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds)))
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
sourceGen.io.reclaim.valid := false.B // not used
sourceGen.io.reclaim.bits := DontCare // not used
val coalesceValid = chosenValid && sourceGen.io.id.valid val coalesceValid = chosenValid && sourceGen.io.id.valid
@@ -1062,9 +1078,11 @@ object TLUtils {
} }
} }
class MemTraceDriver(config: CoalescerConfig, filename: String)(implicit // `traceHasSource` is true if the input trace file has an additional source
p: Parameters // ID column. This is useful for using the output trace file genereated by
) extends LazyModule { // MemTraceLogger as the driver.
class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: Boolean = false)
(implicit p: Parameters) extends LazyModule {
// Create N client nodes together // Create N client nodes together
val laneNodes = Seq.tabulate(config.numLanes) { i => val laneNodes = Seq.tabulate(config.numLanes) { i =>
val clientParam = Seq( val clientParam = Seq(
@@ -1082,7 +1100,7 @@ class MemTraceDriver(config: CoalescerConfig, filename: String)(implicit
val node = TLIdentityNode() val node = TLIdentityNode()
laneNodes.foreach { l => node := l } laneNodes.foreach { l => node := l }
lazy val module = new MemTraceDriverImp(this, config, filename) lazy val module = new MemTraceDriverImp(this, config, filename, traceHasSource)
} }
trait HasTraceLine { trait HasTraceLine {
@@ -1105,7 +1123,8 @@ class TraceLine extends Bundle with HasTraceLine {
val data = UInt(64.W) val data = UInt(64.W)
} }
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String) class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String,
traceHasSource: Boolean)
extends LazyModuleImp(outer) extends LazyModuleImp(outer)
with UnitTestModule { with UnitTestModule {
// Current cycle mark to read from trace // Current cycle mark to read from trace
@@ -1119,7 +1138,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
// Are we safe to read the next warp? // Are we safe to read the next warp?
val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _) val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _)
val sim = Module(new SimMemTrace(filename, config.numLanes)) val sim = Module(new SimMemTrace(filename, config.numLanes, traceHasSource))
sim.io.clock := clock sim.io.clock := clock
sim.io.reset := reset.asBool sim.io.reset := reset.asBool
// 'sim.io.trace_ready.ready' is a ready signal going into the DPI sim, // 'sim.io.trace_ready.ready' is a ready signal going into the DPI sim,
@@ -1155,12 +1174,6 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
reqQ.io.enq.bits := req // FIXME duplicate valid reqQ.io.enq.bits := req // FIXME duplicate valid
} }
// To prevent collision of sourceId with a current in-flight message,
// just use a counter that increments indefinitely as the sourceId of new
// messages.
val sourceIdCounter = RegInit(0.U(64.W))
sourceIdCounter := sourceIdCounter + 1.U
// Issue here is that Vortex mem range is not within Chipyard Mem range // Issue here is that Vortex mem range is not within Chipyard Mem range
// In default setting, all mem-req for program data must be within // In default setting, all mem-req for program data must be within
// 0X80000000 -> 0X90000000 // 0X80000000 -> 0X90000000
@@ -1193,22 +1206,27 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W) val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
val wordAlignedSize = Mux(subword, 2.U, req.size) val wordAlignedSize = Mux(subword, 2.U, req.size)
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numOldSrcIds),
ignoreInUse = false))
sourceGen.io.gen := reqQ.io.deq.fire
// assert(sourceGen.io.id.valid)
val (plegal, pbits) = edge.Put( val (plegal, pbits) = edge.Put(
fromSource = sourceIdCounter, fromSource = sourceGen.io.id.bits,
toAddress = hashToValidPhyAddr(wordAlignedAddress), toAddress = hashToValidPhyAddr(wordAlignedAddress),
lgSize = wordAlignedSize, // trace line already holds log2(size) lgSize = wordAlignedSize, // trace line already holds log2(size)
// data should be aligned to beatBytes // data should be aligned to beatBytes
data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
) )
val (glegal, gbits) = edge.Get( val (glegal, gbits) = edge.Get(
fromSource = sourceIdCounter, fromSource = sourceGen.io.id.bits,
toAddress = hashToValidPhyAddr(wordAlignedAddress), toAddress = hashToValidPhyAddr(wordAlignedAddress),
lgSize = wordAlignedSize lgSize = wordAlignedSize
) )
val legal = Mux(req.is_store, plegal, glegal) val legal = Mux(req.is_store, plegal, glegal)
val bits = Mux(req.is_store, pbits, gbits) val bits = Mux(req.is_store, pbits, gbits)
tlOut.a.valid := reqQ.io.deq.valid tlOut.a.valid := (reqQ.io.deq.valid && sourceGen.io.id.valid)
when (tlOut.a.valid) { when (tlOut.a.valid) {
assert(legal, "illegal TL req gen") assert(legal, "illegal TL req gen")
} }
@@ -1218,6 +1236,10 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
tlOut.d.ready := true.B tlOut.d.ready := true.B
tlOut.e.valid := false.B tlOut.e.valid := false.B
// Reclaim source id on response
sourceGen.io.reclaim.valid := tlOut.d.valid
sourceGen.io.reclaim.bits := tlOut.d.bits.source
// debug // debug
when(tlOut.a.valid) { when(tlOut.a.valid) {
TLPrintf( TLPrintf(
@@ -1251,10 +1273,11 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
} }
} }
class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean)
class SimMemTrace(filename: String, numLanes: Int)
extends BlackBox( extends BlackBox(
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes) Map("FILENAME" -> filename,
"NUM_LANES" -> numLanes,
"HAS_SOURCE" -> (if (traceHasSource) 1 else 0))
) )
with HasBlackBoxResource { with HasBlackBoxResource {
val traceLineT = new TraceLine val traceLineT = new TraceLine
@@ -1406,11 +1429,13 @@ class MemTraceLogger(
// This assert only holds true for PutFullData and not PutPartialData, // This assert only holds true for PutFullData and not PutPartialData,
// where HIGH bits in the mask may not be contiguous. // where HIGH bits in the mask may not be contiguous.
assert( when (tlIn.a.valid) {
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size), assert(
"mask HIGH popcount do not match the TL size. " + PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
"Partial masks are not allowed for PutFull" "mask HIGH popcount do not match the TL size. " +
) "Partial masks are not allowed for PutFull"
)
}
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask) val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
val dataW = tlIn.params.dataBits val dataW = tlIn.params.dataBits
val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U)) val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U))

View File

@@ -1,8 +1,8 @@
package freechips.rocketchip.tilelink package freechips.rocketchip.tilelink
import freechips.rocketchip.diplomacy._ import freechips.rocketchip.diplomacy.LazyModule
import freechips.rocketchip.subsystem.{BaseSubsystem} import freechips.rocketchip.subsystem.BaseSubsystem
import org.chipsalliance.cde.config.{Parameters, Config} import org.chipsalliance.cde.config.Parameters
// The trait is attached to DigitalTop of Chipyard system, informing it indeed // The trait is attached to DigitalTop of Chipyard system, informing it indeed
// has the ability to attach GPU tracer node onto the system bus // has the ability to attach GPU tracer node onto the system bus
@@ -13,20 +13,14 @@ trait CanHaveMemtraceCore { this: BaseSubsystem =>
// Safe to use get as WithMemtraceCore requires WithNLanes to be defined // Safe to use get as WithMemtraceCore requires WithNLanes to be defined
val simtParam = p(SIMTCoreKey).get val simtParam = p(SIMTCoreKey).get
val config = defaultConfig.copy(numLanes = simtParam.nLanes) val config = defaultConfig.copy(numLanes = simtParam.nLanes)
val tracer = LazyModule(new MemTraceDriver(config, param.tracefilename)(p)) val tracer = LazyModule(
new MemTraceDriver(config, param.tracefilename, param.traceHasSource)(p)
)
// Must use :=* to ensure the N edges from Tracer doesn't get merged into 1 // Must use :=* to ensure the N edges from Tracer doesn't get merged into 1
// when connecting to SBus // when connecting to SBus
println(s"============ MemTraceDriver instantiated [filename=${param.tracefilename}]") println(
s"============ MemTraceDriver instantiated [filename=${param.tracefilename}]"
)
sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node
} }
} }
//This is used by Chip Level Config, the config which creates the SoC
class WithMemtraceCore(tracefilename: String)
extends Config((site, _, _) => { case MemtraceCoreKey =>
require(
site(SIMTCoreKey).isDefined,
"Memtrace core requires a SIMT configuration. Use WithNLanes to enable SIMT."
)
Some(MemtraceCoreParams(tracefilename))
})