From e7eda21a1c26ed0e64a34fcd3f6dd300efa6f16c Mon Sep 17 00:00:00 2001 From: "joonho.whangbo" Date: Mon, 22 Jan 2024 13:41:41 -0800 Subject: [PATCH 1/3] Make no tlmonitor the default config, add explicit example on how to add tlmonitors --- docs/Simulation/Software-RTL-Simulation.rst | 26 +++++++++++++++++++ .../main/scala/config/AbstractConfig.scala | 1 + .../src/main/scala/config/RocketConfigs.scala | 4 +++ .../config/fragments/SubsystemFragments.scala | 6 ++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/docs/Simulation/Software-RTL-Simulation.rst b/docs/Simulation/Software-RTL-Simulation.rst index 8c87cc80..adeeea1f 100644 --- a/docs/Simulation/Software-RTL-Simulation.rst +++ b/docs/Simulation/Software-RTL-Simulation.rst @@ -223,3 +223,29 @@ The ``VERILATOR_THREADS=`` option enables the compiled Verilator simulator On a multi-socket machine, you will want to make sure all threads are on the same socket by using ``NUMACTL=1`` to enable ``numactl``. By enabling this, you will use Chipyard's ``numa_prefix`` wrapper, which is a simple wrapper around ``numactl`` that runs your verilated simulator like this: ``$(numa_prefix) ./simulator- ``. Note that both these flags are mutually exclusive, you can use either independently (though it makes sense to use ``NUMACTL`` just with ``VERILATOR_THREADS=8`` during a Verilator simulation). + + +When the SoC Hangs Due to Bad Tilelink Messages +----------------------------------------------- + +There are many cases when your custom module interfaces with Tilelink (e.g., when you write a custom accelerator). +Wrong interfaces with Tilelink can cause the SoC to hang and can be tricky to debug. +To help deal with these situations, you can add hardware modules called Tilelink monitors into +your SoC that will fire assertions when wrong Tilelink messages are sent. + +You can simply add these modules into your SoC by adding the below line into your config. + +.. code-block:: scala + + new chipyard.config.WithTLMonitors ++ + + +For instance: + +.. code-block:: scala + + class TLMonitorRocketConfig extends Config( + new chipyard.config.WithTLMonitors ++ + new chipyard.RocketConfig) + +One caveat is that adding these modules can slow down your RTL simulation by around 2x. diff --git a/generators/chipyard/src/main/scala/config/AbstractConfig.scala b/generators/chipyard/src/main/scala/config/AbstractConfig.scala index 8a328daf..f5f55f53 100644 --- a/generators/chipyard/src/main/scala/config/AbstractConfig.scala +++ b/generators/chipyard/src/main/scala/config/AbstractConfig.scala @@ -81,6 +81,7 @@ class AbstractConfig extends Config( new chipyard.config.WithL2TLBs(1024) ++ // use L2 TLBs new chipyard.config.WithNoSubsystemClockIO ++ // drive the subsystem diplomatic clocks from ChipTop instead of using implicit clocks new chipyard.config.WithInheritBusFrequencyAssignments ++ // Unspecified clocks within a bus will receive the bus frequency if set + new freechips.rocketchip.subsystem.WithoutTLMonitors ++ // Don't add TL Monitors in the default configuration new freechips.rocketchip.subsystem.WithNMemoryChannels(1) ++ // Default 1 memory channels new freechips.rocketchip.subsystem.WithClockGateModel ++ // add default EICG_wrapper clock gate model new freechips.rocketchip.subsystem.WithJtagDTM ++ // set the debug module to expose a JTAG port diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index 0ddb3737..c8e8679e 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -96,3 +96,7 @@ class ClusteredRocketConfig extends Config( new freechips.rocketchip.subsystem.WithCluster(1) ++ new freechips.rocketchip.subsystem.WithCluster(0) ++ new chipyard.config.AbstractConfig) + +class TLMonitorRocketConfig extends Config( + new chipyard.config.WithTLMonitors ++ + new chipyard.RocketConfig) diff --git a/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala b/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala index b4971cba..0a35d192 100644 --- a/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala +++ b/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala @@ -2,7 +2,7 @@ package chipyard.config import org.chipsalliance.cde.config.{Config} import freechips.rocketchip.subsystem._ -import freechips.rocketchip.diplomacy.{DTSTimebase} +import freechips.rocketchip.diplomacy.{DTSTimebase, MonitorsEnabled} import sifive.blocks.inclusivecache.{InclusiveCachePortParameters} // Replaces the L2 with a broadcast manager for maintaining coherence @@ -31,3 +31,7 @@ class WithInclusiveCacheInteriorBuffer(buffer: InclusiveCachePortParameters = In class WithInclusiveCacheExteriorBuffer(buffer: InclusiveCachePortParameters = InclusiveCachePortParameters.full) extends Config((site, here, up) => { case InclusiveCacheKey => up(InclusiveCacheKey).copy(bufInnerExterior=buffer, bufOuterExterior=buffer) }) + +class WithTLMonitors extends Config((site, here, up) => { + case MonitorsEnabled => true +}) From da8a1b50d01f3c649a4021a3ad92ff0ef093ab52 Mon Sep 17 00:00:00 2001 From: "joonho.whangbo" Date: Mon, 22 Jan 2024 13:58:46 -0800 Subject: [PATCH 2/3] Fast rocket config --- docs/Simulation/Software-RTL-Simulation.rst | 13 ++++++------- .../src/main/scala/config/AbstractConfig.scala | 1 - .../src/main/scala/config/RocketConfigs.scala | 4 ++-- .../scala/config/fragments/SubsystemFragments.scala | 6 +----- 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/docs/Simulation/Software-RTL-Simulation.rst b/docs/Simulation/Software-RTL-Simulation.rst index adeeea1f..fc97d959 100644 --- a/docs/Simulation/Software-RTL-Simulation.rst +++ b/docs/Simulation/Software-RTL-Simulation.rst @@ -225,27 +225,26 @@ By enabling this, you will use Chipyard's ``numa_prefix`` wrapper, which is a si Note that both these flags are mutually exclusive, you can use either independently (though it makes sense to use ``NUMACTL`` just with ``VERILATOR_THREADS=8`` during a Verilator simulation). -When the SoC Hangs Due to Bad Tilelink Messages +Speeding up your RTL Simulation by 2x! ----------------------------------------------- There are many cases when your custom module interfaces with Tilelink (e.g., when you write a custom accelerator). Wrong interfaces with Tilelink can cause the SoC to hang and can be tricky to debug. To help deal with these situations, you can add hardware modules called Tilelink monitors into your SoC that will fire assertions when wrong Tilelink messages are sent. +However, these modules can significantly slow down the speed of your RTL simulation. -You can simply add these modules into your SoC by adding the below line into your config. +You can simply remove these modules by adding the below line into your config. .. code-block:: scala - new chipyard.config.WithTLMonitors ++ + new freechips.rocketchip.subsystem.WithoutTLMonitors ++ For instance: .. code-block:: scala - class TLMonitorRocketConfig extends Config( - new chipyard.config.WithTLMonitors ++ + class FastRTLSimRocketConfig extends Config( + new freechips.rocketchip.subsystem.WithoutTLMonitors ++ new chipyard.RocketConfig) - -One caveat is that adding these modules can slow down your RTL simulation by around 2x. diff --git a/generators/chipyard/src/main/scala/config/AbstractConfig.scala b/generators/chipyard/src/main/scala/config/AbstractConfig.scala index f5f55f53..8a328daf 100644 --- a/generators/chipyard/src/main/scala/config/AbstractConfig.scala +++ b/generators/chipyard/src/main/scala/config/AbstractConfig.scala @@ -81,7 +81,6 @@ class AbstractConfig extends Config( new chipyard.config.WithL2TLBs(1024) ++ // use L2 TLBs new chipyard.config.WithNoSubsystemClockIO ++ // drive the subsystem diplomatic clocks from ChipTop instead of using implicit clocks new chipyard.config.WithInheritBusFrequencyAssignments ++ // Unspecified clocks within a bus will receive the bus frequency if set - new freechips.rocketchip.subsystem.WithoutTLMonitors ++ // Don't add TL Monitors in the default configuration new freechips.rocketchip.subsystem.WithNMemoryChannels(1) ++ // Default 1 memory channels new freechips.rocketchip.subsystem.WithClockGateModel ++ // add default EICG_wrapper clock gate model new freechips.rocketchip.subsystem.WithJtagDTM ++ // set the debug module to expose a JTAG port diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index c8e8679e..d3f21584 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -97,6 +97,6 @@ class ClusteredRocketConfig extends Config( new freechips.rocketchip.subsystem.WithCluster(0) ++ new chipyard.config.AbstractConfig) -class TLMonitorRocketConfig extends Config( - new chipyard.config.WithTLMonitors ++ +class FastRTLSimRocketConfig extends Config( + new freechips.rocketchip.subsystem.WithoutTLMonitors ++ new chipyard.RocketConfig) diff --git a/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala b/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala index 0a35d192..b4971cba 100644 --- a/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala +++ b/generators/chipyard/src/main/scala/config/fragments/SubsystemFragments.scala @@ -2,7 +2,7 @@ package chipyard.config import org.chipsalliance.cde.config.{Config} import freechips.rocketchip.subsystem._ -import freechips.rocketchip.diplomacy.{DTSTimebase, MonitorsEnabled} +import freechips.rocketchip.diplomacy.{DTSTimebase} import sifive.blocks.inclusivecache.{InclusiveCachePortParameters} // Replaces the L2 with a broadcast manager for maintaining coherence @@ -31,7 +31,3 @@ class WithInclusiveCacheInteriorBuffer(buffer: InclusiveCachePortParameters = In class WithInclusiveCacheExteriorBuffer(buffer: InclusiveCachePortParameters = InclusiveCachePortParameters.full) extends Config((site, here, up) => { case InclusiveCacheKey => up(InclusiveCacheKey).copy(bufInnerExterior=buffer, bufOuterExterior=buffer) }) - -class WithTLMonitors extends Config((site, here, up) => { - case MonitorsEnabled => true -}) From b848e23d0871dbeaedaf74fa5096555513361aba Mon Sep 17 00:00:00 2001 From: "joonho.whangbo" Date: Mon, 22 Jan 2024 14:14:59 -0800 Subject: [PATCH 3/3] Update TLMonitor documentation --- docs/Simulation/Software-RTL-Simulation.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/Simulation/Software-RTL-Simulation.rst b/docs/Simulation/Software-RTL-Simulation.rst index fc97d959..c84673b9 100644 --- a/docs/Simulation/Software-RTL-Simulation.rst +++ b/docs/Simulation/Software-RTL-Simulation.rst @@ -234,7 +234,8 @@ To help deal with these situations, you can add hardware modules called Tilelink your SoC that will fire assertions when wrong Tilelink messages are sent. However, these modules can significantly slow down the speed of your RTL simulation. -You can simply remove these modules by adding the below line into your config. +These modules are added to the SoC as a default and users have to manually +remove these modules by adding the below line into your config. .. code-block:: scala