Merge remote-tracking branch 'origin/main' into tetheredsim

This commit is contained in:
Jerry Zhao
2023-05-31 21:55:09 -07:00
52 changed files with 857 additions and 670 deletions

View File

@@ -72,17 +72,18 @@ object SpikeCosim
})
cosim.io.hartid := hartid.U
for (i <- 0 until trace.numInsns) {
cosim.io.trace(i).valid := trace.insns(i).valid
val insn = trace.trace.insns(i)
cosim.io.trace(i).valid := insn.valid
val signed = Wire(SInt(64.W))
signed := trace.insns(i).iaddr.asSInt
signed := insn.iaddr.asSInt
cosim.io.trace(i).iaddr := signed.asUInt
cosim.io.trace(i).insn := trace.insns(i).insn
cosim.io.trace(i).exception := trace.insns(i).exception
cosim.io.trace(i).interrupt := trace.insns(i).interrupt
cosim.io.trace(i).cause := trace.insns(i).cause
cosim.io.trace(i).has_wdata := trace.insns(i).wdata.isDefined.B
cosim.io.trace(i).wdata := trace.insns(i).wdata.getOrElse(0.U)
cosim.io.trace(i).priv := trace.insns(i).priv
cosim.io.trace(i).insn := insn.insn
cosim.io.trace(i).exception := insn.exception
cosim.io.trace(i).interrupt := insn.interrupt
cosim.io.trace(i).cause := insn.cause
cosim.io.trace(i).has_wdata := insn.wdata.isDefined.B
cosim.io.trace(i).wdata := insn.wdata.getOrElse(0.U)
cosim.io.trace(i).priv := insn.priv
}
}
}

View File

@@ -62,6 +62,7 @@ case class SpikeCoreParams() extends CoreParams {
val useBitManipCrypto = false
val useCryptoNIST = false
val useCryptoSM = false
val useConditionalZero = false
override def vLen = 128
override def vMemDataBits = 128

View File

@@ -38,9 +38,9 @@ class WithPLLSelectorDividerClockGenerator extends OverrideLazyIOBinder({
val clockSelector = system.prci_ctrl_domain { LazyModule(new TLClockSelector(baseAddress + 0x30000, tlbus.beatBytes)) }
val pllCtrl = system.prci_ctrl_domain { LazyModule(new FakePLLCtrl (baseAddress + 0x40000, tlbus.beatBytes)) }
tlbus.toVariableWidthSlave(Some("clock-div-ctrl")) { clockDivider.tlNode := TLBuffer() }
tlbus.toVariableWidthSlave(Some("clock-sel-ctrl")) { clockSelector.tlNode := TLBuffer() }
tlbus.toVariableWidthSlave(Some("pll-ctrl")) { pllCtrl.tlNode := TLBuffer() }
clockDivider.tlNode := system.prci_ctrl_bus
clockSelector.tlNode := system.prci_ctrl_bus
pllCtrl.tlNode := system.prci_ctrl_bus
system.allClockGroupsNode := clockDivider.clockNode := clockSelector.clockNode

View File

@@ -23,9 +23,9 @@ object ClockGroupCombiner {
case object ClockGroupCombinerKey extends Field[Seq[(String, ClockSinkParameters => Boolean)]](Nil)
// All clock groups with a name containing any substring in names will be combined into a single clock group
class WithClockGroupsCombinedByName(groups: (String, Seq[String])*) extends Config((site, here, up) => {
case ClockGroupCombinerKey => groups.map { case (grouped_name, matched_names) =>
(grouped_name, (m: ClockSinkParameters) => matched_names.map(n => m.name.get.contains(n)).reduce(_||_))
class WithClockGroupsCombinedByName(groups: (String, Seq[String], Seq[String])*) extends Config((site, here, up) => {
case ClockGroupCombinerKey => groups.map { case (grouped_name, matched_names, unmatched_names) =>
(grouped_name, (m: ClockSinkParameters) => matched_names.exists(n => m.name.get.contains(n)) && !unmatched_names.exists(n => m.name.get.contains(n)))
}
})

View File

@@ -36,6 +36,14 @@ trait HasChipyardPRCI { this: BaseSubsystem with InstantiatesTiles =>
val prci_ctrl_domain = LazyModule(new ClockSinkDomain(name=Some("chipyard-prci-control")))
prci_ctrl_domain.clockNode := tlbus.fixedClockNode
val prci_ctrl_bus = prci_ctrl_domain { TLXbar() }
tlbus.coupleTo("prci_ctrl") { (prci_ctrl_bus
:= TLFIFOFixer(TLFIFOFixer.all)
:= TLFragmenter(tlbus.beatBytes, tlbus.blockBytes)
:= TLBuffer()
:= _)
}
// Aggregate all the clock groups into a single node
val aggregator = LazyModule(new ClockGroupAggregator("allClocks")).node
val allClockGroupsNode = ClockGroupEphemeralNode()
@@ -71,19 +79,24 @@ trait HasChipyardPRCI { this: BaseSubsystem with InstantiatesTiles =>
// diplomatic IOBinder should drive
val frequencySpecifier = ClockGroupFrequencySpecifier(p(ClockFrequencyAssignersKey))
val clockGroupCombiner = ClockGroupCombiner()
val resetSynchronizer = ClockGroupResetSynchronizer()
val tileClockGater = if (prciParams.enableTileClockGating) { prci_ctrl_domain {
TileClockGater(prciParams.baseAddress + 0x00000, tlbus)
} } else { ClockGroupEphemeralNode() }
val tileResetSetter = if (prciParams.enableTileResetSetting) { prci_ctrl_domain {
TileResetSetter(prciParams.baseAddress + 0x10000, tlbus, tile_prci_domains.map(_.tile_reset_domain.clockNode.portParams(0).name.get), Nil)
} } else { ClockGroupEphemeralNode() }
val resetSynchronizer = prci_ctrl_domain { ClockGroupResetSynchronizer() }
val tileClockGater = Option.when(prciParams.enableTileClockGating) { prci_ctrl_domain {
val clock_gater = LazyModule(new TileClockGater(prciParams.baseAddress + 0x00000, tlbus.beatBytes))
clock_gater.tlNode := prci_ctrl_bus
clock_gater
} }
val tileResetSetter = Option.when(prciParams.enableTileResetSetting) { prci_ctrl_domain {
val reset_setter = LazyModule(new TileResetSetter(prciParams.baseAddress + 0x10000, tlbus.beatBytes,
tile_prci_domains.map(_.tile_reset_domain.clockNode.portParams(0).name.get), Nil))
reset_setter.tlNode := prci_ctrl_bus
reset_setter
} }
(aggregator
:= frequencySpecifier
:= clockGroupCombiner
:= resetSynchronizer
:= tileClockGater
:= tileResetSetter
:= tileClockGater.map(_.clockNode).getOrElse(ClockGroupEphemeralNode()(ValName("temp")))
:= tileResetSetter.map(_.clockNode).getOrElse(ClockGroupEphemeralNode()(ValName("temp")))
:= allClockGroupsNode)
}

View File

@@ -26,20 +26,26 @@ class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8)(implicit
val sinks = clockNode.out.head._1.member.elements.toSeq
require (sources.size == sinks.size)
val nSinks = sinks.size
// The implicit clock of this module is the clock of the tilelink bus
// busReset is sync'd to that clock, and will be asserted longer than the
// resets coming in through the clockNode, since the busReset is derived from
// the clockNode resets in downstream PRCI nodes
val busReset = reset
val regs = (0 until nSinks) .map { i =>
val sinkName = sinks(i)._1
val asyncReset = sources(i).reset
val reg = withReset (asyncReset) {
Module(new AsyncResetRegVec(w=divBits, init=0))
}
val reg = Module(new AsyncResetRegVec(w=divBits, init=0))
println(s"${(address+i*4).toString(16)}: Clock domain $sinkName divider")
sinks(i)._2.clock := withClockAndReset(sources(i).clock, asyncReset) {
val divider = Module(new testchipip.ClockDivideOrPass(divBits, depth = 3, genClockGate = p(ClockGateImpl)))
divider.io.divisor := reg.io.q
divider.io.resetAsync := ResetStretcher(sources(i).clock, asyncReset, 20).asAsyncReset
divider.io.clockOut
}
val divider = Module(new testchipip.ClockDivideOrPass(divBits, depth = 3, genClockGate = p(ClockGateImpl)))
divider.io.clockIn := sources(i).clock
// busReset is expected to be high for a long time, since reset will take a while to propagate
// to the TL bus. While reset is propagating, make sure we propagate a fast, undivided clock
// by setting divisor=0. The divisor signal into the ClockDividerOrPass is synchronized internally
divider.io.divisor := Mux(busReset.asBool, 0.U, reg.io.q)
divider.io.resetAsync := ResetStretcher(sources(i).clock, asyncReset, 20).asAsyncReset
sinks(i)._2.clock := divider.io.clockOut
// Note this is not synchronized to the output clock, which takes time to appear
// so this is still asyncreset

View File

@@ -46,10 +46,3 @@ class TileClockGater(address: BigInt, beatBytes: Int)(implicit p: Parameters, va
}
}
object TileClockGater {
def apply(address: BigInt, tlbus: TLBusWrapper)(implicit p: Parameters, v: ValName) = {
val gater = LazyModule(new TileClockGater(address, tlbus.beatBytes))
tlbus.toVariableWidthSlave(Some("clock-gater")) { gater.tlNode := TLBuffer() }
gater.clockNode
}
}

View File

@@ -62,12 +62,3 @@ class TileResetSetter(address: BigInt, beatBytes: Int, tileNames: Seq[String], i
}
}
}
object TileResetSetter {
def apply(address: BigInt, tlbus: TLBusWrapper, tileNames: Seq[String], initResetHarts: Seq[Int])(implicit p: Parameters, v: ValName) = {
val setter = LazyModule(new TileResetSetter(address, tlbus.beatBytes, tileNames, initResetHarts))
tlbus.toVariableWidthSlave(Some("tile-reset-setter")) { setter.tlNode := TLBuffer() }
setter.clockNode
}
}

View File

@@ -48,7 +48,7 @@ class AbstractConfig extends Config(
// By default, punch out IOs to the Harness
new chipyard.clocking.WithPassthroughClockGenerator ++
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "mbus", "pbus", "fbus", "cbus", "implicit"))) ++
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "mbus", "pbus", "fbus", "cbus", "implicit"), Seq("tile"))) ++
new chipyard.config.WithPeripheryBusFrequency(500.0) ++ // Default 500 MHz pbus
new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Default 500 MHz mbus

View File

@@ -40,7 +40,7 @@ class ChipLikeRocketConfig extends Config(
new chipyard.clocking.WithPLLSelectorDividerClockGenerator ++ // Use a PLL-based clock selector/divider generator structure
// Create the uncore clock group
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("implicit", "sbus", "mbus", "cbus", "system_bus", "fbus", "pbus"))) ++
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("implicit", "sbus", "mbus", "cbus", "system_bus", "fbus", "pbus"), Nil)) ++
new chipyard.config.AbstractConfig)

View File

@@ -78,8 +78,8 @@ class MulticlockRocketConfig extends Config(
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
// Frequency specifications
new chipyard.config.WithTileFrequency(1000.0) ++ // Matches the maximum frequency of U540
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore" , Seq("sbus", "cbus", "implicit")),
("periphery", Seq("pbus", "fbus"))) ++
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore" , Seq("sbus", "cbus", "implicit"), Nil),
("periphery", Seq("pbus", "fbus"), Nil)) ++
new chipyard.config.WithSystemBusFrequency(500.0) ++ // Matches the maximum frequency of U540
new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Matches the maximum frequency of U540
new chipyard.config.WithPeripheryBusFrequency(500.0) ++ // Matches the maximum frequency of U540

View File

@@ -11,7 +11,7 @@ class AbstractTraceGenConfig extends Config(
new chipyard.iobinders.WithAXI4MemPunchthrough ++
new chipyard.iobinders.WithTraceGenSuccessPunchthrough ++
new chipyard.clocking.WithPassthroughClockGenerator ++
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "implicit"))) ++
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "implicit"), Nil)) ++
new chipyard.config.WithTracegenSystem ++
new chipyard.config.WithNoSubsystemDrivenClocks ++
new chipyard.config.WithMemoryBusFrequency(1000.0) ++

View File

@@ -7,7 +7,7 @@ import freechips.rocketchip.diplomacy._
import freechips.rocketchip.prci._
import freechips.rocketchip.util._
import freechips.rocketchip.devices.debug.{ExportDebug, JtagDTMKey, Debug}
import freechips.rocketchip.tilelink.{TLBuffer}
import freechips.rocketchip.tilelink.{TLBuffer, TLFragmenter}
import chipyard.{BuildSystem, DigitalTop}
import chipyard.clocking._
import chipyard.iobinders.{IOCellKey, JTAGChipIO}
@@ -33,9 +33,9 @@ class FlatChipTop(implicit p: Parameters) extends LazyModule {
val clockSelector = system.prci_ctrl_domain { LazyModule(new TLClockSelector(baseAddress + 0x30000, tlbus.beatBytes)) }
val pllCtrl = system.prci_ctrl_domain { LazyModule(new FakePLLCtrl (baseAddress + 0x40000, tlbus.beatBytes)) }
tlbus.toVariableWidthSlave(Some("clock-div-ctrl")) { clockDivider.tlNode := TLBuffer() }
tlbus.toVariableWidthSlave(Some("clock-sel-ctrl")) { clockSelector.tlNode := TLBuffer() }
tlbus.toVariableWidthSlave(Some("pll-ctrl")) { pllCtrl.tlNode := TLBuffer() }
tlbus.coupleTo("clock-div-ctrl") { clockDivider.tlNode := TLFragmenter(tlbus.beatBytes, tlbus.blockBytes) := TLBuffer() := _ }
tlbus.coupleTo("clock-sel-ctrl") { clockSelector.tlNode := TLFragmenter(tlbus.beatBytes, tlbus.blockBytes) := TLBuffer() := _ }
tlbus.coupleTo("pll-ctrl") { pllCtrl.tlNode := TLFragmenter(tlbus.beatBytes, tlbus.blockBytes) := TLBuffer() := _ }
system.allClockGroupsNode := clockDivider.clockNode := clockSelector.clockNode

View File

@@ -165,17 +165,17 @@ trait CanHavePeripheryGCD { this: BaseSubsystem =>
case Some(params) => {
if (params.useAXI4) {
val gcd = LazyModule(new GCDAXI4(params, pbus.beatBytes)(p))
pbus.toSlave(Some(portName)) {
pbus.coupleTo(portName) {
gcd.node :=
AXI4Buffer () :=
TLToAXI4 () :=
// toVariableWidthSlave doesn't use holdFirstDeny, which TLToAXI4() needsx
TLFragmenter(pbus.beatBytes, pbus.blockBytes, holdFirstDeny = true)
TLFragmenter(pbus.beatBytes, pbus.blockBytes, holdFirstDeny = true) := _
}
Some(gcd)
} else {
val gcd = LazyModule(new GCDTL(params, pbus.beatBytes)(p))
pbus.toVariableWidthSlave(Some(portName)) { gcd.node }
pbus.coupleTo(portName) { gcd.node := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ }
Some(gcd)
}
}

View File

@@ -62,7 +62,7 @@ trait CanHavePeripheryInitZero { this: BaseSubsystem =>
p(InitZeroKey) .map { k =>
val initZero = LazyModule(new InitZero()(p))
fbus.fromPort(Some("init-zero"))() := initZero.node
fbus.coupleFrom("init-zero") { _ := initZero.node }
}
}

View File

@@ -67,6 +67,7 @@ case class MyCoreParams(
val useCryptoNIST: Boolean = false
val useCryptoSM: Boolean = false
val traceHasWdata: Boolean = false
val useConditionalZero = false
}
// DOC include start: CanAttachTile

View File

@@ -203,7 +203,7 @@ trait CanHavePeripheryStreamingFIR extends BaseSubsystem {
genOut = FixedPoint(8.W, 3.BP),
coeffs = Seq(1.F(0.BP), 2.F(0.BP), 3.F(0.BP)),
params = params))
pbus.toVariableWidthSlave(Some("streamingFIR")) { streamingFIR.mem.get := TLFIFOFixer() }
pbus.coupleTo("streamingFIR") { streamingFIR.mem.get := TLFIFOFixer() := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ }
Some(streamingFIR)
}
case None => None

View File

@@ -132,7 +132,7 @@ trait CanHavePeripheryStreamingPassthrough { this: BaseSubsystem =>
val passthrough = p(StreamingPassthroughKey) match {
case Some(params) => {
val streamingPassthroughChain = LazyModule(new TLStreamingPassthroughChain(params, UInt(32.W)))
pbus.toVariableWidthSlave(Some("streamingPassthrough")) { streamingPassthroughChain.mem.get := TLFIFOFixer() }
pbus.coupleTo("streamingPassthrough") { streamingPassthroughChain.mem.get := TLFIFOFixer() := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ }
Some(streamingPassthroughChain)
}
case None => None

View File

@@ -38,16 +38,22 @@ class FireSimClockBridgeInstantiator extends HarnessClockInstantiator {
var instantiatedClocks = LinkedHashMap[Int, (Clock, Seq[String])]()
// connect wires to clock source
for ((name, (freq, clock)) <- clockMap) {
val freqMHz = (freq / (1000 * 1000)).toInt
def findOrInstantiate(freqMHz: Int, name: String): Clock = {
if (!instantiatedClocks.contains(freqMHz)) {
val clock = Wire(Clock())
instantiatedClocks(freqMHz) = (clock, Seq(name))
} else {
instantiatedClocks(freqMHz) = (instantiatedClocks(freqMHz)._1, instantiatedClocks(freqMHz)._2 :+ name)
}
clock := instantiatedClocks(freqMHz)._1
instantiatedClocks(freqMHz)._1
}
for ((name, (freq, clock)) <- clockMap) {
val freqMHz = (freq / (1000 * 1000)).toInt
clock := findOrInstantiate(freqMHz, name)
}
// The undivided reference clock as calculated by pllConfig must be instantiated
findOrInstantiate(pllConfig.referenceFreqMHz.toInt, "reference")
val ratClocks = instantiatedClocks.map { case (freqMHz, (clock, names)) =>
(RationalClock(names.mkString(","), 1, pllConfig.referenceFreqMHz.toInt / freqMHz), clock)

View File

@@ -103,11 +103,15 @@ class WithFireSimDesignTweaks extends Config(
// Tweaks to modify target clock frequencies / crossings to legacy firesim defaults
class WithFireSimHighPerfClocking extends Config(
// Create clock group for uncore that does not include mbus
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "pbus", "fbus", "cbus", "implicit"), Nil)) ++
// Optional: This sets the default frequency for all buses in the system to 3.2 GHz
// (since unspecified bus frequencies will use the pbus frequency)
// This frequency selection matches FireSim's legacy selection and is required
// to support 200Gb NIC performance. You may select a smaller value.
new chipyard.config.WithPeripheryBusFrequency(3200.0) ++
new chipyard.config.WithSystemBusFrequency(3200.0) ++
new chipyard.config.WithFrontBusFrequency(3200.0) ++
// Optional: These three configs put the DRAM memory system in it's own clock domain.
// Removing the first config will result in the FASED timing model running
// at the pbus freq (above, 3.2 GHz), which is outside the range of valid DDR3 speedgrades.
@@ -164,6 +168,23 @@ class WithFireSimHighPerfConfigTweaks extends Config(
new WithFireSimDesignTweaks
)
// Tweak more representative of testchip configs
class WithFireSimTestChipConfigTweaks extends Config(
// Frequency specifications
new chipyard.config.WithTileFrequency(1000.0) ++ // Realistic tile frequency for a test chip
new chipyard.config.WithSystemBusFrequency(500.0) ++ // Realistic system bus frequency
new chipyard.config.WithMemoryBusFrequency(1000.0) ++ // Needs to be 1000 MHz to model DDR performance accurately
new chipyard.config.WithPeripheryBusFrequency(500.0) ++ // Match the sbus and pbus frequency
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "pbus", "fbus", "cbus", "implicit"), Seq("tile"))) ++
// Crossing specifications
new chipyard.config.WithCbusToPbusCrossingType(AsynchronousCrossing()) ++ // Add Async crossing between PBUS and CBUS
new chipyard.config.WithSbusToMbusCrossingType(AsynchronousCrossing()) ++ // Add Async crossings between backside of L2 and MBUS
new freechips.rocketchip.subsystem.WithRationalRocketTiles ++ // Add rational crossings between RocketTile and uncore
new boom.common.WithRationalBoomTiles ++ // Add rational crossings between BoomTile and uncore
new testchipip.WithAsynchronousSerialSlaveCrossing ++ // Add Async crossing between serial and MBUS. Its master-side is tied to the FBUS
new WithFireSimDesignTweaks
)
/*******************************************************************************
* Full TARGET_CONFIG configurations. These set parameters of the target being
* simulated.