Merge pull request #690 from ucb-bar/diplomatic-clocks-mbus-crossing

DRAM In A Separate Domain Take 2
This commit is contained in:
David Biancolin
2020-10-19 15:15:09 -04:00
committed by GitHub
14 changed files with 206 additions and 52 deletions

View File

@@ -8,12 +8,12 @@ import freechips.rocketchip.prci._
import freechips.rocketchip.subsystem.{BaseSubsystem, SubsystemDriveAsyncClockGroupsKey, InstantiatesTiles}
import freechips.rocketchip.config.{Parameters, Field, Config}
import freechips.rocketchip.diplomacy.{OutwardNodeHandle, InModuleBody, LazyModule}
import freechips.rocketchip.util.{ResetCatchAndSync, Pow2ClockDivider}
import freechips.rocketchip.util.{ResetCatchAndSync}
import barstools.iocell.chisel._
import testchipip.{TLTileResetCtrl}
import chipyard.clocking.{DividerOnlyClockGenerator, ClockGroupNamePrefixer, ClockGroupFrequencySpecifier}
import chipyard.clocking._
/**
* Chipyard provides three baseline, top-level reset schemes, set using the
@@ -121,13 +121,14 @@ object ClockingSchemeGenerators {
:= ClockGroup()
:= aggregator)
(systemAsyncClockGroup
:= resetSetter
:= ClockGroupNamePrefixer()
:= aggregator)
:*= resetSetter
:*= ClockGroupNamePrefixer()
:*= aggregator)
val referenceClockSource = ClockSourceNode(Seq(ClockSourceParameters()))
(aggregator
:= ClockGroupFrequencySpecifier(p(ClockFrequencyAssignersKey), p(DefaultClockFrequencyKey))
:= ClockGroupResetSynchronizer()
:= DividerOnlyClockGenerator()
:= referenceClockSource)

View File

@@ -5,13 +5,13 @@ import chisel3.util.{log2Up}
import freechips.rocketchip.config.{Field, Parameters, Config}
import freechips.rocketchip.subsystem._
import freechips.rocketchip.diplomacy.{LazyModule, ValName}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.devices.tilelink.{BootROMLocated}
import freechips.rocketchip.devices.debug.{Debug, ExportDebug, DebugModuleKey, DMI}
import freechips.rocketchip.groundtest.{GroundTestSubsystem}
import freechips.rocketchip.tile._
import freechips.rocketchip.rocket.{RocketCoreParams, MulDivParams, DCacheParams, ICacheParams}
import freechips.rocketchip.util.{AsyncResetReg}
import freechips.rocketchip.util.{AsyncResetReg, Symmetric}
import freechips.rocketchip.prci._
import testchipip._
@@ -172,3 +172,46 @@ class WithPeripheryBusFrequencyAsDefault extends Config((site, here, up) => {
case DefaultClockFrequencyKey => (site(PeripheryBusKey).dtsFrequency.get / (1000 * 1000)).toDouble
})
/**
* Mixins to specify crossing types between the 5 traditional TL buses
*
* Note: these presuppose the legacy connections between buses and set
* parameters in SubsystemCrossingParams; they may not be resuable in custom
* topologies (but you can specify the desired crossings in your topology).
*
* @param xType The clock crossing type
*
*/
class WithSbusToMbusCrossingType(xType: ClockCrossingType) extends Config((site, here, up) => {
case SbusToMbusXTypeKey => xType
})
class WithSbusToCbusCrossingType(xType: ClockCrossingType) extends Config((site, here, up) => {
case SbusToCbusXTypeKey => xType
})
class WithCbusToPbusCrossingType(xType: ClockCrossingType) extends Config((site, here, up) => {
case CbusToPbusXTypeKey => xType
})
class WithFbusToSbusCrossingType(xType: ClockCrossingType) extends Config((site, here, up) => {
case FbusToSbusXTypeKey => xType
})
/**
* Mixins to set the dtsFrequency field of BusParams -- these will percolate its way
* up the diplomatic graph to the clock sources.
*/
class WithPeripheryBusFrequency(freqMHz: Double) extends Config((site, here, up) => {
case PeripheryBusKey => up(PeripheryBusKey).copy(dtsFrequency = Some(BigInt((freqMHz * 1e6).toLong)))
})
class WithMemoryBusFrequency(freqMHz: Double) extends Config((site, here, up) => {
case MemoryBusKey => up(MemoryBusKey).copy(dtsFrequency = Some(BigInt((freqMHz * 1e6).toLong)))
})
class WithSystemBusFrequency(freqMHz: Double) extends Config((site, here, up) => {
case SystemBusKey => up(SystemBusKey).copy(dtsFrequency = Some(BigInt((freqMHz * 1e6).toLong)))
})
class WithControlBusFrequency(freqMHz: Double) extends Config((site, here, up) => {
case ControlBusKey => up(ControlBusKey).copy(dtsFrequency = Some(BigInt((freqMHz * 1e6).toLong)))
})
class WithRationalMemoryBusCrossing extends WithSbusToMbusCrossingType(RationalCrossing(Symmetric))
class WithAsynchrousMemoryBusCrossing extends WithSbusToMbusCrossingType(AsynchronousCrossing())

View File

@@ -0,0 +1,62 @@
package chipyard
import freechips.rocketchip.config.{Field, Config, Parameters}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util.{Location, Symmetric}
import freechips.rocketchip.subsystem._
// I'm putting this code here temporarily as I think it should be a candidate
// for upstreaming based on input from Henry Cook, but don't wnat to deal with
// an RC branch just yet.
// For subsystem/BusTopology.scala
/**
* Keys that serve as a means to define crossing types from a Parameters instance
*/
case object SbusToMbusXTypeKey extends Field[ClockCrossingType](NoCrossing)
case object SbusToCbusXTypeKey extends Field[ClockCrossingType](NoCrossing)
case object CbusToPbusXTypeKey extends Field[ClockCrossingType](SynchronousCrossing())
case object FbusToSbusXTypeKey extends Field[ClockCrossingType](SynchronousCrossing())
// Biancolin: This, modified from Henry's email
/** Parameterization of a topology containing a banked coherence manager and a bus for attaching memory devices. */
case class CoherentMulticlockBusTopologyParams(
sbus: SystemBusParams, // TODO remove this after better width propagation
mbus: MemoryBusParams,
l2: BankedL2Params,
sbusToMbusXType: ClockCrossingType = NoCrossing
) extends TLBusWrapperTopology(
instantiations = (if (l2.nBanks == 0) Nil else List(
(MBUS, mbus),
(L2, CoherenceManagerWrapperParams(mbus.blockBytes, mbus.beatBytes, l2.nBanks, L2.name)(l2.coherenceManager)))),
connections = if (l2.nBanks == 0) Nil else List(
(SBUS, L2, TLBusWrapperConnection(xType = NoCrossing, driveClockFromMaster = Some(true), nodeBinding = BIND_STAR)()),
(L2, MBUS, TLBusWrapperConnection.crossTo(
xType = sbusToMbusXType,
driveClockFromMaster = Some(true),
nodeBinding = BIND_QUERY))
)
)
// For subsystem/Configs.scala
class WithMulticlockCoherentBusTopology extends Config((site, here, up) => {
case TLNetworkTopologyLocated(InSubsystem) => List(
JustOneBusTopologyParams(sbus = site(SystemBusKey)),
HierarchicalBusTopologyParams(
pbus = site(PeripheryBusKey),
fbus = site(FrontBusKey),
cbus = site(ControlBusKey),
xTypes = SubsystemCrossingParams(
sbusToCbusXType = site(SbusToCbusXTypeKey),
cbusToPbusXType = site(CbusToPbusXTypeKey),
fbusToSbusXType = site(FbusToSbusXTypeKey))),
CoherentMulticlockBusTopologyParams(
sbus = site(SystemBusKey),
mbus = site(MemoryBusKey),
l2 = site(BankedL2Key),
sbusToMbusXType = site(SbusToMbusXTypeKey)))
})

View File

@@ -125,11 +125,11 @@ class WithSimNetwork extends OverrideHarnessBinder({
})
class WithSimAXIMem extends OverrideHarnessBinder({
(system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedIO[AXI4Bundle]]) => {
(system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedAndResetIO[AXI4Bundle]]) => {
val p: Parameters = chipyard.iobinders.GetSystemParameters(system)
(ports zip system.memAXI4Node.edges.in).map { case (port, edge) =>
val mem = LazyModule(new SimAXIMem(edge, size=p(ExtMem).get.master.size)(p))
withClockAndReset(port.clock, th.harnessReset) {
withClockAndReset(port.clock, port.reset) {
Module(mem.module).suggestName("mem")
}
mem.io_axi4.head <> port.bits
@@ -139,7 +139,7 @@ class WithSimAXIMem extends OverrideHarnessBinder({
})
class WithBlackBoxSimMem extends OverrideHarnessBinder({
(system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedIO[AXI4Bundle]]) => {
(system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedAndResetIO[AXI4Bundle]]) => {
val p: Parameters = chipyard.iobinders.GetSystemParameters(system)
(ports zip system.memAXI4Node.edges.in).map { case (port, edge) =>
val memSize = p(ExtMem).get.master.size
@@ -147,18 +147,18 @@ class WithBlackBoxSimMem extends OverrideHarnessBinder({
val mem = Module(new SimDRAM(memSize, lineSize, edge.bundle)).suggestName("simdram")
mem.io.axi <> port.bits
mem.io.clock := port.clock
mem.io.reset := th.harnessReset
mem.io.reset := port.reset
}
Nil
}
})
class WithSimAXIMMIO extends OverrideHarnessBinder({
(system: CanHaveMasterAXI4MMIOPort, th: HasHarnessSignalReferences, ports: Seq[ClockedIO[AXI4Bundle]]) => {
(system: CanHaveMasterAXI4MMIOPort, th: HasHarnessSignalReferences, ports: Seq[ClockedAndResetIO[AXI4Bundle]]) => {
val p: Parameters = chipyard.iobinders.GetSystemParameters(system)
(ports zip system.mmioAXI4Node.edges.in).map { case (port, edge) =>
val mmio_mem = LazyModule(new SimAXIMem(edge, size = p(ExtBus).get.size)(p))
withClockAndReset(port.clock, th.harnessReset) {
withClockAndReset(port.clock, port.reset) {
Module(mmio_mem.module).suggestName("mmio_mem")
}
mmio_mem.io_axi4.head <> port.bits

View File

@@ -108,14 +108,25 @@ class ComposeIOBinder[T, S <: Data](fn: => (T) => (Seq[S], Seq[IOCell]))(implici
})
object BoreHelper {
def apply(name: String, source: Clock): Clock = {
val clock_io = IO(Output(Clock())).suggestName(name)
val clock_wire = Wire(Clock()).suggestName(s"chiptop_${name}")
dontTouch(clock_wire)
clock_wire := false.B.asClock // necessary for BoringUtils to work properly
BoringUtils.bore(source, Seq(clock_wire))
clock_io := clock_wire
clock_io
def apply[T <: Data](name: String, source: T): T = {
val (io, wire) = source match {
case c: Clock =>
val wire = Wire(Clock())
// Provide a dummy assignment to prevent FIRRTL invalid assignment
// errors prior to running the wiring pass
wire := false.B.asClock
(IO(Output(Clock())), wire)
case r: Reset =>
val wire = Wire(Reset())
wire := false.B
(IO(Output(Reset())), wire)
}
io.suggestName(name)
wire.suggestName(s"chiptop_${name}")
dontTouch(wire)
BoringUtils.bore(source, Seq(wire))
io := wire
io.asInstanceOf[source.type]
}
}
@@ -257,10 +268,12 @@ class WithSerialTLIOCells extends OverrideIOBinder({
class WithAXI4MemPunchthrough extends OverrideIOBinder({
(system: CanHaveMasterAXI4MemPort) => {
val ports: Seq[ClockedIO[AXI4Bundle]] = system.mem_axi4.zipWithIndex.map({ case (m, i) =>
val p = IO(new ClockedIO(DataMirror.internal.chiselTypeClone[AXI4Bundle](m))).suggestName(s"axi4_mem_${i}")
val ports: Seq[ClockedAndResetIO[AXI4Bundle]] = system.mem_axi4.zipWithIndex.map({ case (m, i) =>
val p = IO(new ClockedAndResetIO(DataMirror.internal.chiselTypeClone[AXI4Bundle](m))).suggestName(s"axi4_mem_${i}")
p.bits <> m
p.clock := BoreHelper("axi4_mem_clock", system.asInstanceOf[BaseSubsystem].mbus.module.clock)
val mbus = system.asInstanceOf[HasTileLinkLocations].locateTLBusWrapper(MBUS)
p.clock := BoreHelper("axi4_mem_clock", mbus.module.clock)
p.reset := BoreHelper("axi4_mem_reset", mbus.module.reset)
p
})
(ports, Nil)
@@ -269,10 +282,12 @@ class WithAXI4MemPunchthrough extends OverrideIOBinder({
class WithAXI4MMIOPunchthrough extends OverrideIOBinder({
(system: CanHaveMasterAXI4MMIOPort) => {
val ports: Seq[ClockedIO[AXI4Bundle]] = system.mmio_axi4.zipWithIndex.map({ case (m, i) =>
val p = IO(new ClockedIO(DataMirror.internal.chiselTypeClone[AXI4Bundle](m))).suggestName(s"axi4_mmio_${i}")
val ports: Seq[ClockedAndResetIO[AXI4Bundle]] = system.mmio_axi4.zipWithIndex.map({ case (m, i) =>
val p = IO(new ClockedAndResetIO(DataMirror.internal.chiselTypeClone[AXI4Bundle](m))).suggestName(s"axi4_mmio_${i}")
p.bits <> m
p.clock := BoreHelper("axi4_mmio_clock", system.asInstanceOf[BaseSubsystem].mbus.module.clock)
val mbus = system.asInstanceOf[HasTileLinkLocations].locateTLBusWrapper(MBUS)
p.clock := BoreHelper("axi4_mmio_clock", mbus.module.clock)
p.reset := BoreHelper("axi4_mmio_reset", mbus.module.reset)
p
})
(ports, Nil)

View File

@@ -62,6 +62,10 @@ case class DividerOnlyClockGeneratorNode(pllName: String)(implicit valName: ValN
* fast reference clock (roughly LCM(requested frequencies)) which is passed up the
* diplomatic graph, and then generates dividers for each unique requested
* frequency.
*
* Output resets are not synchronized to generated clocks and should be
* synchronized by the user in a manner they see fit.
*
*/
class DividerOnlyClockGenerator(pllName: String)(implicit p: Parameters, valName: ValName) extends LazyModule {
@@ -87,6 +91,7 @@ class DividerOnlyClockGenerator(pllName: String)(implicit p: Parameters, valName
for (((sinkBName, sinkB), sinkP) <- outClocks.member.elements.zip(outSinkParams.members)) {
val div = pllConfig.sinkDividerMap(sinkP)
sinkB.clock := dividedClocks.getOrElse(div, instantiateDivider(div))
// Reset handling and synchronization is expected to be handled by a downstream node
sinkB.reset := refClock.reset
}
}

View File

@@ -0,0 +1,30 @@
package chipyard.clocking
import chisel3._
import freechips.rocketchip.config.{Parameters}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.prci._
import freechips.rocketchip.util.{ResetCatchAndSync}
/**
* Instantiates a reset synchronizer on all clock-reset pairs in a clock group
*/
class ClockGroupResetSynchronizer(implicit p: Parameters) extends LazyModule {
val node = ClockGroupIdentityNode()
lazy val module = new LazyRawModuleImp(this) {
(node.out zip node.in).map { case ((oG, _), (iG, _)) =>
(oG.member.data zip iG.member.data).foreach { case (o, i) =>
o.clock := i.clock
o.reset := ResetCatchAndSync(i.clock, i.reset.asBool)
}
}
}
}
object ClockGroupResetSynchronizer {
def apply()(implicit p: Parameters, valName: ValName) = LazyModule(new ClockGroupResetSynchronizer()).node
}

View File

@@ -49,6 +49,6 @@ class AbstractConfig extends Config(
new freechips.rocketchip.subsystem.WithNoSlavePort ++ // no top-level MMIO slave port (overrides default set in rocketchip)
new freechips.rocketchip.subsystem.WithInclusiveCache ++ // use Sifive L2 cache
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++ // no external interrupts
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++ // hierarchical buses including mbus+l2
new chipyard.WithMulticlockCoherentBusTopology ++ // hierarchical buses including mbus+l2
new freechips.rocketchip.system.BaseConfig) // "base" rocketchip system

View File

@@ -179,6 +179,9 @@ class DividedClockRocketConfig extends Config(
new chipyard.config.WithTileFrequency(200.0) ++
new freechips.rocketchip.subsystem.WithRationalRocketTiles ++ // Add rational crossings between RocketTile and uncore
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new chipyard.config.WithMemoryBusFrequency(50.0) ++
new chipyard.config.WithAsynchrousMemoryBusCrossing ++
new testchipip.WithAsynchronousSerialSlaveCrossing ++
new chipyard.config.AbstractConfig)
class LBWIFRocketConfig extends Config(

View File

@@ -18,7 +18,7 @@ import icenet.{CanHavePeripheryIceNIC, SimNetwork, NicLoopback, NICKey, NICIOvon
import junctions.{NastiKey, NastiParameters}
import midas.models.{FASEDBridge, AXI4EdgeSummary, CompleteConfig}
import midas.targetutils.{FAMEModelAnnotation, MemModelAnnotation, EnableModelMultiThreadingAnnotation}
import midas.targetutils.{MemModelAnnotation, EnableModelMultiThreadingAnnotation}
import firesim.bridges._
import firesim.configs.MemModelKey
import tracegen.{TraceGenSystemModuleImp}
@@ -98,14 +98,14 @@ class WithBlockDeviceBridge extends OverrideHarnessBinder({
})
class WithFASEDBridge extends OverrideHarnessBinder({
(system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedIO[AXI4Bundle]]) => {
(system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedAndResetIO[AXI4Bundle]]) => {
implicit val p: Parameters = GetSystemParameters(system)
(ports zip system.memAXI4Node.edges.in).map { case (axi4, edge) =>
val nastiKey = NastiParameters(axi4.bits.r.bits.data.getWidth,
axi4.bits.ar.bits.addr.getWidth,
axi4.bits.ar.bits.id.getWidth)
system match {
case s: BaseSubsystem => FASEDBridge(axi4.clock, axi4.bits, th.harnessReset.asBool,
case s: BaseSubsystem => FASEDBridge(axi4.clock, axi4.bits, axi4.reset.asBool,
CompleteConfig(p(firesim.configs.MemModelKey),
nastiKey,
Some(AXI4EdgeSummary(edge)),
@@ -161,10 +161,8 @@ class WithFireSimFAME5 extends ComposeIOBinder({
(system: HasTilesModuleImp) => {
system.outer.tiles.map {
case b: BoomTile =>
annotate(FAMEModelAnnotation(b.module))
annotate(EnableModelMultiThreadingAnnotation(b.module))
case r: RocketTile =>
annotate(FAMEModelAnnotation(r.module))
annotate(EnableModelMultiThreadingAnnotation(r.module))
}
(Nil, Nil)

View File

@@ -16,7 +16,7 @@ import midas.widgets.{Bridge, PeekPokeBridge, RationalClockBridge, RationalClock
import chipyard._
import chipyard.harness._
import chipyard.iobinders._
import chipyard.clocking.{FrequencyUtils, ClockGroupNamePrefixer, ClockGroupFrequencySpecifier, SimplePllConfiguration}
import chipyard.clocking._
// Determines the number of times to instantiate the DUT in the harness.
// Subsumes legacy supernode support
@@ -96,11 +96,12 @@ class WithFireSimSimpleClocks extends Config((site, here, up) => {
val aggregator = LazyModule(new ClockGroupAggregator("allClocks")).node
(chiptop.implicitClockSinkNode := ClockGroup() := aggregator)
(systemAsyncClockGroup := ClockGroupNamePrefixer() := aggregator)
(systemAsyncClockGroup :*= ClockGroupNamePrefixer() :*= aggregator)
val inputClockSource = ClockGroupSourceNode(Seq(ClockGroupSourceParameters()))
(aggregator
:= ClockGroupResetSynchronizer()
:= ClockGroupFrequencySpecifier(p(ClockFrequencyAssignersKey), p(DefaultClockFrequencyKey))
:= inputClockSource)
@@ -113,15 +114,7 @@ class WithFireSimSimpleClocks extends Config((site, here, up) => {
(clockGroupBundle.member.data zip input_clocks.data).foreach { case (clockBundle, inputClock) =>
clockBundle.clock := inputClock
}
// Assign resets. The synchronization scheme is still WIP.
for ((name, clockBundle) <- clockGroupBundle.member.elements) {
if (name.contains("core")) {
clockBundle.reset := ResetCatchAndSync(clockBundle.clock, reset.asBool)
} else {
clockBundle.reset := reset
}
clockBundle.reset := reset
}
val pllConfig = new SimplePllConfiguration("FireSim RationalClockBridge", clockGroupEdge.sink.members)

View File

@@ -37,10 +37,6 @@ class WithBootROM extends Config((site, here, up) => {
}
})
class WithPeripheryBusFrequency(freq: BigInt) extends Config((site, here, up) => {
case PeripheryBusKey => up(PeripheryBusKey).copy(dtsFrequency = Some(freq))
})
// Disables clock-gating; doesn't play nice with our FAME-1 pass
class WithoutClockGating extends Config((site, here, up) => {
case DebugModuleKey => up(DebugModuleKey, site).map(_.copy(clockGate = false))
@@ -74,7 +70,15 @@ class WithFireSimConfigTweaks extends Config(
new WithBootROM ++
// Optional*: Removing this will require adjusting the UART baud rate and
// potential target-software changes to properly capture UART output
new WithPeripheryBusFrequency(BigInt(3200000000L)) ++
new chipyard.config.WithPeripheryBusFrequency(3200.0) ++
// Optional: These three configs put the DRAM memory system in it's own clock domian.
// Removing the first config will result in the FASED timing model running
// at the pbus freq (above, 3.2 GHz), which is outside the range of valid DDR3 speedgrades.
// 1 GHz matches the FASED default, using some other frequency will require
// runnings the FASED runtime configuration generator to generate faithful DDR3 timing values.
new chipyard.config.WithMemoryBusFrequency(1000.0) ++
new chipyard.config.WithAsynchrousMemoryBusCrossing ++
new testchipip.WithAsynchronousSerialSlaveCrossing ++
// Required: Existing FAME-1 transform cannot handle black-box clock gates
new WithoutClockGating ++
// Required*: Removes thousands of assertions that would be synthesized (* pending PriorityMux bugfix)
@@ -129,7 +133,7 @@ class FireSimSmallSystemConfig extends Config(
new WithDefaultFireSimBridges ++
new WithDefaultMemModel ++
new WithBootROM ++
new WithPeripheryBusFrequency(BigInt(3200000000L)) ++
new chipyard.config.WithPeripheryBusFrequency(3200.0) ++
new WithoutClockGating ++
new WithoutTLMonitors ++
new freechips.rocketchip.subsystem.WithExtMemSize(1 << 28) ++