Merge remote-tracking branch 'origin/graphics' into local-dev-branch
This commit is contained in:
@@ -152,7 +152,7 @@ MemTraceLine MemTraceReader::read_trace_at(const long cycle, const int lane_id,
|
|||||||
assert(!"unreachable");
|
assert(!"unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void memtrace_init(const char *filename) {
|
extern "C" void memtrace_init(const char *filename, bool has_source) {
|
||||||
#ifndef NO_VPI
|
#ifndef NO_VPI
|
||||||
s_vpi_vlog_info info;
|
s_vpi_vlog_info info;
|
||||||
if (!vpi_get_vlog_info(&info)) {
|
if (!vpi_get_vlog_info(&info)) {
|
||||||
@@ -175,7 +175,7 @@ extern "C" void memtrace_init(const char *filename) {
|
|||||||
reader = std::make_unique<MemTraceReader>(filename);
|
reader = std::make_unique<MemTraceReader>(filename);
|
||||||
// parse file upfront
|
// parse file upfront
|
||||||
// driver trace file is assumed to not have source id
|
// driver trace file is assumed to not have source id
|
||||||
reader->parse(false);
|
reader->parse(has_source);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: accept core_id as well
|
// TODO: accept core_id as well
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ public:
|
|||||||
FILE *outfile;
|
FILE *outfile;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern "C" void memtrace_init(const char *filename);
|
extern "C" void memtrace_init(const char *filename, bool has_source);
|
||||||
extern "C" void memtrace_query(unsigned char trace_read_ready,
|
extern "C" void memtrace_query(unsigned char trace_read_ready,
|
||||||
unsigned long trace_read_cycle,
|
unsigned long trace_read_cycle,
|
||||||
int trace_read_lane_id,
|
int trace_read_lane_id,
|
||||||
|
|||||||
@@ -4,7 +4,8 @@
|
|||||||
`define LOGSIZE_WIDTH 8
|
`define LOGSIZE_WIDTH 8
|
||||||
|
|
||||||
import "DPI-C" function void memtrace_init(
|
import "DPI-C" function void memtrace_init(
|
||||||
input string filename
|
input string filename,
|
||||||
|
input bit has_source
|
||||||
);
|
);
|
||||||
|
|
||||||
// Make sure to sync the parameters for:
|
// Make sure to sync the parameters for:
|
||||||
@@ -24,7 +25,9 @@ import "DPI-C" function void memtrace_query
|
|||||||
output bit trace_read_finished
|
output bit trace_read_finished
|
||||||
);
|
);
|
||||||
|
|
||||||
module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
module SimMemTrace #(parameter FILENAME = "undefined",
|
||||||
|
NUM_LANES = 4,
|
||||||
|
HAS_SOURCE = 0) (
|
||||||
input clock,
|
input clock,
|
||||||
input reset,
|
input reset,
|
||||||
|
|
||||||
@@ -61,7 +64,7 @@ module SimMemTrace #(parameter FILENAME = "undefined", NUM_LANES = 4) (
|
|||||||
|
|
||||||
initial begin
|
initial begin
|
||||||
/* $value$plusargs("uartlog=%s", __uartlog); */
|
/* $value$plusargs("uartlog=%s", __uartlog); */
|
||||||
memtrace_init(FILENAME);
|
memtrace_init(FILENAME, HAS_SOURCE);
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clock) begin
|
always @(posedge clock) begin
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import freechips.rocketchip.unittest._
|
|||||||
|
|
||||||
// TODO: find better place for these
|
// TODO: find better place for these
|
||||||
case class SIMTCoreParams(nLanes: Int = 4)
|
case class SIMTCoreParams(nLanes: Int = 4)
|
||||||
case class MemtraceCoreParams(tracefilename: String = "undefined")
|
case class MemtraceCoreParams(tracefilename: String = "undefined", traceHasSource: Boolean = false)
|
||||||
|
|
||||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/)
|
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/)
|
||||||
case object MemtraceCoreKey extends Field[Option[MemtraceCoreParams]](None /*default*/)
|
case object MemtraceCoreKey extends Field[Option[MemtraceCoreParams]](None /*default*/)
|
||||||
@@ -81,7 +81,7 @@ object defaultConfig extends CoalescerConfig(
|
|||||||
wordSizeInBytes = 4,
|
wordSizeInBytes = 4,
|
||||||
wordWidth = 2,
|
wordWidth = 2,
|
||||||
// when attaching to SoC, 16 source IDs are not enough due to longer latency
|
// when attaching to SoC, 16 source IDs are not enough due to longer latency
|
||||||
numOldSrcIds = 64,
|
numOldSrcIds = 16,
|
||||||
numNewSrcIds = 4,
|
numNewSrcIds = 4,
|
||||||
respQueueDepth = 4,
|
respQueueDepth = 4,
|
||||||
coalLogSizes = Seq(3),
|
coalLogSizes = Seq(3),
|
||||||
@@ -187,19 +187,33 @@ class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, maxSize: Int) extends Bun
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ReqSourceGen(sourceWidth: Int) extends Module {
|
// If `ignoreInUse`, just keep giving out new IDs without checking if it is in
|
||||||
|
// use.
|
||||||
|
class RoundRobinSourceGenerator(sourceWidth: Int, ignoreInUse: Boolean = true) extends Module {
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val gen = Input(Bool())
|
val gen = Input(Bool())
|
||||||
|
val reclaim = Input(Valid(UInt(sourceWidth.W)))
|
||||||
val id = Output(Valid(UInt(sourceWidth.W)))
|
val id = Output(Valid(UInt(sourceWidth.W)))
|
||||||
})
|
})
|
||||||
|
|
||||||
val head = RegInit(UInt(sourceWidth.W), 0.U)
|
val head = RegInit(UInt(sourceWidth.W), 0.U)
|
||||||
|
|
||||||
head := Mux(io.gen, head + 1.U, head)
|
head := Mux(io.gen, head + 1.U, head)
|
||||||
|
|
||||||
// FIXME: keep track of ones in use & set invalid when out
|
val numSourceId = 1 << sourceWidth
|
||||||
io.id.valid := true.B
|
// true: in use, false: available
|
||||||
|
val occupancyTable = Mem(numSourceId, Valid(UInt(sourceWidth.W)))
|
||||||
|
when(reset.asBool) {
|
||||||
|
(0 until numSourceId).foreach { i => occupancyTable(i).valid := false.B }
|
||||||
|
}
|
||||||
|
|
||||||
|
io.id.valid := (if (ignoreInUse) true.B else !occupancyTable(head).valid)
|
||||||
io.id.bits := head
|
io.id.bits := head
|
||||||
|
when (io.gen && io.id.valid /* fire */) {
|
||||||
|
occupancyTable(io.id.bits).valid := true.B // mark in use
|
||||||
|
}
|
||||||
|
when (io.reclaim.valid) {
|
||||||
|
occupancyTable(io.reclaim.bits).valid := false.B // mark freed
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module {
|
class CoalShiftQueue[T <: Data](gen: T, entries: Int, config: CoalescerConfig) extends Module {
|
||||||
@@ -545,8 +559,10 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
|
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numNewSrcIds)))
|
||||||
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
|
sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
|
||||||
|
sourceGen.io.reclaim.valid := false.B // not used
|
||||||
|
sourceGen.io.reclaim.bits := DontCare // not used
|
||||||
|
|
||||||
val coalesceValid = chosenValid && sourceGen.io.id.valid
|
val coalesceValid = chosenValid && sourceGen.io.id.valid
|
||||||
|
|
||||||
@@ -1062,9 +1078,11 @@ object TLUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriver(config: CoalescerConfig, filename: String)(implicit
|
// `traceHasSource` is true if the input trace file has an additional source
|
||||||
p: Parameters
|
// ID column. This is useful for using the output trace file genereated by
|
||||||
) extends LazyModule {
|
// MemTraceLogger as the driver.
|
||||||
|
class MemTraceDriver(config: CoalescerConfig, filename: String, traceHasSource: Boolean = false)
|
||||||
|
(implicit p: Parameters) extends LazyModule {
|
||||||
// Create N client nodes together
|
// Create N client nodes together
|
||||||
val laneNodes = Seq.tabulate(config.numLanes) { i =>
|
val laneNodes = Seq.tabulate(config.numLanes) { i =>
|
||||||
val clientParam = Seq(
|
val clientParam = Seq(
|
||||||
@@ -1082,7 +1100,7 @@ class MemTraceDriver(config: CoalescerConfig, filename: String)(implicit
|
|||||||
val node = TLIdentityNode()
|
val node = TLIdentityNode()
|
||||||
laneNodes.foreach { l => node := l }
|
laneNodes.foreach { l => node := l }
|
||||||
|
|
||||||
lazy val module = new MemTraceDriverImp(this, config, filename)
|
lazy val module = new MemTraceDriverImp(this, config, filename, traceHasSource)
|
||||||
}
|
}
|
||||||
|
|
||||||
trait HasTraceLine {
|
trait HasTraceLine {
|
||||||
@@ -1105,7 +1123,8 @@ class TraceLine extends Bundle with HasTraceLine {
|
|||||||
val data = UInt(64.W)
|
val data = UInt(64.W)
|
||||||
}
|
}
|
||||||
|
|
||||||
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String)
|
class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename: String,
|
||||||
|
traceHasSource: Boolean)
|
||||||
extends LazyModuleImp(outer)
|
extends LazyModuleImp(outer)
|
||||||
with UnitTestModule {
|
with UnitTestModule {
|
||||||
// Current cycle mark to read from trace
|
// Current cycle mark to read from trace
|
||||||
@@ -1119,7 +1138,7 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
|
|||||||
// Are we safe to read the next warp?
|
// Are we safe to read the next warp?
|
||||||
val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _)
|
val reqQueueAllReady = reqQueues.map(_.io.enq.ready).reduce(_ && _)
|
||||||
|
|
||||||
val sim = Module(new SimMemTrace(filename, config.numLanes))
|
val sim = Module(new SimMemTrace(filename, config.numLanes, traceHasSource))
|
||||||
sim.io.clock := clock
|
sim.io.clock := clock
|
||||||
sim.io.reset := reset.asBool
|
sim.io.reset := reset.asBool
|
||||||
// 'sim.io.trace_ready.ready' is a ready signal going into the DPI sim,
|
// 'sim.io.trace_ready.ready' is a ready signal going into the DPI sim,
|
||||||
@@ -1155,12 +1174,6 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
|
|||||||
reqQ.io.enq.bits := req // FIXME duplicate valid
|
reqQ.io.enq.bits := req // FIXME duplicate valid
|
||||||
}
|
}
|
||||||
|
|
||||||
// To prevent collision of sourceId with a current in-flight message,
|
|
||||||
// just use a counter that increments indefinitely as the sourceId of new
|
|
||||||
// messages.
|
|
||||||
val sourceIdCounter = RegInit(0.U(64.W))
|
|
||||||
sourceIdCounter := sourceIdCounter + 1.U
|
|
||||||
|
|
||||||
// Issue here is that Vortex mem range is not within Chipyard Mem range
|
// Issue here is that Vortex mem range is not within Chipyard Mem range
|
||||||
// In default setting, all mem-req for program data must be within
|
// In default setting, all mem-req for program data must be within
|
||||||
// 0X80000000 -> 0X90000000
|
// 0X80000000 -> 0X90000000
|
||||||
@@ -1193,22 +1206,27 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
|
|||||||
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
|
val wordAlignedAddress = req.address & ~((1 << log2Ceil(config.wordSizeInBytes)) - 1).U(addrW.W)
|
||||||
val wordAlignedSize = Mux(subword, 2.U, req.size)
|
val wordAlignedSize = Mux(subword, 2.U, req.size)
|
||||||
|
|
||||||
|
val sourceGen = Module(new RoundRobinSourceGenerator(log2Ceil(config.numOldSrcIds),
|
||||||
|
ignoreInUse = false))
|
||||||
|
sourceGen.io.gen := reqQ.io.deq.fire
|
||||||
|
// assert(sourceGen.io.id.valid)
|
||||||
|
|
||||||
val (plegal, pbits) = edge.Put(
|
val (plegal, pbits) = edge.Put(
|
||||||
fromSource = sourceIdCounter,
|
fromSource = sourceGen.io.id.bits,
|
||||||
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
||||||
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
||||||
// data should be aligned to beatBytes
|
// data should be aligned to beatBytes
|
||||||
data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
|
data = (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
|
||||||
)
|
)
|
||||||
val (glegal, gbits) = edge.Get(
|
val (glegal, gbits) = edge.Get(
|
||||||
fromSource = sourceIdCounter,
|
fromSource = sourceGen.io.id.bits,
|
||||||
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
toAddress = hashToValidPhyAddr(wordAlignedAddress),
|
||||||
lgSize = wordAlignedSize
|
lgSize = wordAlignedSize
|
||||||
)
|
)
|
||||||
val legal = Mux(req.is_store, plegal, glegal)
|
val legal = Mux(req.is_store, plegal, glegal)
|
||||||
val bits = Mux(req.is_store, pbits, gbits)
|
val bits = Mux(req.is_store, pbits, gbits)
|
||||||
|
|
||||||
tlOut.a.valid := reqQ.io.deq.valid
|
tlOut.a.valid := (reqQ.io.deq.valid && sourceGen.io.id.valid)
|
||||||
when (tlOut.a.valid) {
|
when (tlOut.a.valid) {
|
||||||
assert(legal, "illegal TL req gen")
|
assert(legal, "illegal TL req gen")
|
||||||
}
|
}
|
||||||
@@ -1218,6 +1236,10 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
|
|||||||
tlOut.d.ready := true.B
|
tlOut.d.ready := true.B
|
||||||
tlOut.e.valid := false.B
|
tlOut.e.valid := false.B
|
||||||
|
|
||||||
|
// Reclaim source id on response
|
||||||
|
sourceGen.io.reclaim.valid := tlOut.d.valid
|
||||||
|
sourceGen.io.reclaim.bits := tlOut.d.bits.source
|
||||||
|
|
||||||
// debug
|
// debug
|
||||||
when(tlOut.a.valid) {
|
when(tlOut.a.valid) {
|
||||||
TLPrintf(
|
TLPrintf(
|
||||||
@@ -1251,10 +1273,11 @@ class MemTraceDriverImp(outer: MemTraceDriver, config: CoalescerConfig, filename
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class SimMemTrace(filename: String, numLanes: Int, traceHasSource: Boolean)
|
||||||
class SimMemTrace(filename: String, numLanes: Int)
|
|
||||||
extends BlackBox(
|
extends BlackBox(
|
||||||
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
|
Map("FILENAME" -> filename,
|
||||||
|
"NUM_LANES" -> numLanes,
|
||||||
|
"HAS_SOURCE" -> (if (traceHasSource) 1 else 0))
|
||||||
)
|
)
|
||||||
with HasBlackBoxResource {
|
with HasBlackBoxResource {
|
||||||
val traceLineT = new TraceLine
|
val traceLineT = new TraceLine
|
||||||
@@ -1406,11 +1429,13 @@ class MemTraceLogger(
|
|||||||
|
|
||||||
// This assert only holds true for PutFullData and not PutPartialData,
|
// This assert only holds true for PutFullData and not PutPartialData,
|
||||||
// where HIGH bits in the mask may not be contiguous.
|
// where HIGH bits in the mask may not be contiguous.
|
||||||
assert(
|
when (tlIn.a.valid) {
|
||||||
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
|
assert(
|
||||||
"mask HIGH popcount do not match the TL size. " +
|
PopCount(tlIn.a.bits.mask) === (1.U << tlIn.a.bits.size),
|
||||||
"Partial masks are not allowed for PutFull"
|
"mask HIGH popcount do not match the TL size. " +
|
||||||
)
|
"Partial masks are not allowed for PutFull"
|
||||||
|
)
|
||||||
|
}
|
||||||
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
|
val trailingZerosInMask = trailingZeros(tlIn.a.bits.mask)
|
||||||
val dataW = tlIn.params.dataBits
|
val dataW = tlIn.params.dataBits
|
||||||
val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U))
|
val mask = ~(~(0.U(dataW.W)) << ((1.U << tlIn.a.bits.size) * 8.U))
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
package freechips.rocketchip.tilelink
|
package freechips.rocketchip.tilelink
|
||||||
|
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy.LazyModule
|
||||||
import freechips.rocketchip.subsystem.{BaseSubsystem}
|
import freechips.rocketchip.subsystem.BaseSubsystem
|
||||||
import org.chipsalliance.cde.config.{Parameters, Config}
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
|
||||||
// The trait is attached to DigitalTop of Chipyard system, informing it indeed
|
// The trait is attached to DigitalTop of Chipyard system, informing it indeed
|
||||||
// has the ability to attach GPU tracer node onto the system bus
|
// has the ability to attach GPU tracer node onto the system bus
|
||||||
@@ -13,20 +13,14 @@ trait CanHaveMemtraceCore { this: BaseSubsystem =>
|
|||||||
// Safe to use get as WithMemtraceCore requires WithNLanes to be defined
|
// Safe to use get as WithMemtraceCore requires WithNLanes to be defined
|
||||||
val simtParam = p(SIMTCoreKey).get
|
val simtParam = p(SIMTCoreKey).get
|
||||||
val config = defaultConfig.copy(numLanes = simtParam.nLanes)
|
val config = defaultConfig.copy(numLanes = simtParam.nLanes)
|
||||||
val tracer = LazyModule(new MemTraceDriver(config, param.tracefilename)(p))
|
val tracer = LazyModule(
|
||||||
|
new MemTraceDriver(config, param.tracefilename, param.traceHasSource)(p)
|
||||||
|
)
|
||||||
// Must use :=* to ensure the N edges from Tracer doesn't get merged into 1
|
// Must use :=* to ensure the N edges from Tracer doesn't get merged into 1
|
||||||
// when connecting to SBus
|
// when connecting to SBus
|
||||||
println(s"============ MemTraceDriver instantiated [filename=${param.tracefilename}]")
|
println(
|
||||||
|
s"============ MemTraceDriver instantiated [filename=${param.tracefilename}]"
|
||||||
|
)
|
||||||
sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node
|
sbus.fromPort(Some("gpu-tracer"))() :=* tracer.node
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//This is used by Chip Level Config, the config which creates the SoC
|
|
||||||
class WithMemtraceCore(tracefilename: String)
|
|
||||||
extends Config((site, _, _) => { case MemtraceCoreKey =>
|
|
||||||
require(
|
|
||||||
site(SIMTCoreKey).isDefined,
|
|
||||||
"Memtrace core requires a SIMT configuration. Use WithNLanes to enable SIMT."
|
|
||||||
)
|
|
||||||
Some(MemtraceCoreParams(tracefilename))
|
|
||||||
})
|
|
||||||
|
|||||||
Reference in New Issue
Block a user