From 8a60b3612541450124d1ba80890de20dda9b992c Mon Sep 17 00:00:00 2001 From: Nikhil Jha Date: Fri, 26 May 2023 14:02:19 -0700 Subject: [PATCH 1/4] doc: add higher level explanations of RoCC + more resources --- docs/Customization/RoCC-Accelerators.rst | 53 ++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/Customization/RoCC-Accelerators.rst b/docs/Customization/RoCC-Accelerators.rst index 79d9e898..a2d98589 100644 --- a/docs/Customization/RoCC-Accelerators.rst +++ b/docs/Customization/RoCC-Accelerators.rst @@ -1,10 +1,15 @@ .. _rocc-accelerators: Adding a RoCC Accelerator ----------------------------- +------------------------- -RoCC accelerators are lazy modules that extend the ``LazyRoCC`` class. -Their implementation should extends the ``LazyRoCCModule`` class. +A RoCC accelerator is a component that can be added into a particular Rocket or BooM tile. +It receives instructions that match a certain opcode, talks to other parts of the core or SoC (L1, L2, PTW, FPU), and then optionally writes back a value into the register corresponding with the ``rd`` field of the instruction. +RoCC accelerators are instantiated via modules that extend the ``LazyRoCC`` class. +These modules lazily instantiate another module which extends the ``LazyRoCCModule`` class. +This extra layer of indirection is used so that Diplomacy can figure out how to connect the RoCC module to the chip, without needing to instantiate the module ahead of time. +Lazy modules are further explained in the :ref:`Chipyard-Basics/Configs-Parameters-Mixins:Cake Pattern / Mixin` section. +Below is a minimal instantiation of a RoCC accelerator. .. code-block:: scala @@ -31,7 +36,6 @@ Their implementation should extends the ``LazyRoCCModule`` class. ... } - The ``opcodes`` parameter for ``LazyRoCC`` is the set of custom opcodes that will map to this accelerator. More on this in the next subsection. @@ -46,6 +50,46 @@ the ``busy`` signal, which indicates when the accelerator is still handling an i and the ``interrupt`` signal, which can be used to interrupt the CPU. Look at the examples in ``generators/rocket-chip/src/main/scala/tile/LazyRoCC.scala`` for detailed information on the different IOs. +There is also more information about each of the signals in `the RoCC Documentation written by UCSD `_, although it is updated out of tree and may be out of date. + + +Accessing Memory via L1 Cache +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A RoCC accelerator can access memory through the L1 Cache of the core it is attached to. +This is a simpler interface for accelerator architects to implement, but will generally have lower achievable throughput than a dedicated TileLink port. + +In your ``LazyRoCCModuleImp``, the signal ``io.mem`` is a ``HellaCacheIO``, which is defined in ``generators/rocket-chip/src/main/scala/rocket/HellaCache.scala``. + +.. code-block:: scala + + class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { + val req = Decoupled(new HellaCacheReq) + val s1_kill = Output(Bool()) // kill previous cycle's req + val s1_data = Output(new HellaCacheWriteData()) // data for previous cycle's req + val s2_nack = Input(Bool()) // req from two cycles ago is rejected + val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) + val s2_kill = Output(Bool()) // kill req from two cycles ago + val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO + val s2_paddr = Input(UInt(paddrBits.W)) // translated address + + val resp = Flipped(Valid(new HellaCacheResp)) + val replay_next = Input(Bool()) + val s2_xcpt = Input(new HellaCacheExceptions) + val s2_gpa = Input(UInt(vaddrBitsExtended.W)) + val s2_gpa_is_pte = Input(Bool()) + val uncached_resp = tileParams.dcache.get.separateUncachedResp.option(Flipped(Decoupled(new HellaCacheResp))) + val ordered = Input(Bool()) + val perf = Input(new HellaCachePerfEvents()) + + val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? + val clock_enabled = Input(Bool()) // is D$ currently being clocked? + } + +At a high level, you must tag requests that you send across this interface using the ``io.mem.req.tag``, and the tag will be returned to you when the data is ready. +Responses may come back out of order if you issue multiple requests, so you can use these tags to tell what data came back. +Note that the top two bits of the tag are reserved, and MUST be set to zero, or the interface will exhibit undefined behavior. + Adding RoCC accelerator to Config ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,3 +111,4 @@ For instance, if we wanted to add the previously defined accelerator and route c new RocketConfig) To add RoCC instructions in your program, use the RoCC C macros provided in ``tests/rocc.h``. You can find examples in the files ``tests/accum.c`` and ``charcount.c``. + From 471f8879d799b9528ca0e02c4ba0c5861b5ea925 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Sat, 8 Apr 2023 16:38:36 -0700 Subject: [PATCH 2/4] Support banked/partitioned scratchpads --- generators/chipyard/src/main/scala/DigitalTop.scala | 2 +- .../src/main/scala/config/RocketConfigs.scala | 12 +++++++++--- generators/testchipip | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/generators/chipyard/src/main/scala/DigitalTop.scala b/generators/chipyard/src/main/scala/DigitalTop.scala index 6711eac7..561af0d7 100644 --- a/generators/chipyard/src/main/scala/DigitalTop.scala +++ b/generators/chipyard/src/main/scala/DigitalTop.scala @@ -16,7 +16,7 @@ class DigitalTop(implicit p: Parameters) extends ChipyardSystem with testchipip.CanHavePeripheryCustomBootPin // Enables optional custom boot pin with testchipip.CanHavePeripheryBootAddrReg // Use programmable boot address register with testchipip.CanHaveTraceIO // Enables optionally adding trace IO - with testchipip.CanHaveBackingScratchpad // Enables optionally adding a backing scratchpad + with testchipip.CanHaveBankedScratchpad // Enables optionally adding a banked scratchpad with testchipip.CanHavePeripheryBlockDevice // Enables optionally adding the block device with testchipip.CanHavePeripheryTLSerial // Enables optionally adding the backing memory and serial adapter with sifive.blocks.devices.i2c.HasPeripheryI2C // Enables optionally adding the sifive I2C diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index 21b630cc..a68ba55d 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -74,13 +74,19 @@ class L1ScratchpadRocketConfig extends Config( new chipyard.config.AbstractConfig) // DOC include start: mbusscratchpadrocket -class MbusScratchpadRocketConfig extends Config( - new testchipip.WithBackingScratchpad ++ // add mbus backing scratchpad - new freechips.rocketchip.subsystem.WithNoMemPort ++ // remove offchip mem port +class MbusScratchpadOnlyRocketConfig extends Config( + new testchipip.WithMbusScratchpad(stripes=2, partitions=2) ++ // add 4 banks mbus backing scratchpad + new freechips.rocketchip.subsystem.WithNoMemPort ++ // remove offchip mem port new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new chipyard.config.AbstractConfig) // DOC include end: mbusscratchpadrocket +class SbusScratchpadRocketConfig extends Config( + new testchipip.WithSbusScratchpad(base=0x70000000L, stripes=2, partitions=2) ++ // add 4 lanes sbus backing scratchpad + new freechips.rocketchip.subsystem.WithNBigCores(1) ++ + new chipyard.config.AbstractConfig) + + class MulticlockRocketConfig extends Config( new freechips.rocketchip.subsystem.WithAsynchronousRocketTiles(3, 3) ++ // Add async crossings between RocketTile and uncore new freechips.rocketchip.subsystem.WithNBigCores(1) ++ diff --git a/generators/testchipip b/generators/testchipip index a3e9c1ff..8a1540ce 160000 --- a/generators/testchipip +++ b/generators/testchipip @@ -1 +1 @@ -Subproject commit a3e9c1ffeae8af573831e4ac5fd00a76df0ca7f1 +Subproject commit 8a1540ce90405433ac377de5a6d331d34ff108df From 1e3d4aad460051c4fc030d10307097ea84766d33 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 10 Apr 2023 15:31:29 -0700 Subject: [PATCH 3/4] Update WithBackingScratchpad for firechip --- .../chipyard/src/main/scala/config/RocketConfigs.scala | 4 ++-- generators/firechip/src/main/scala/TargetConfigs.scala | 6 +++--- generators/testchipip | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index a68ba55d..a0fab881 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -75,14 +75,14 @@ class L1ScratchpadRocketConfig extends Config( // DOC include start: mbusscratchpadrocket class MbusScratchpadOnlyRocketConfig extends Config( - new testchipip.WithMbusScratchpad(stripes=2, partitions=2) ++ // add 4 banks mbus backing scratchpad + new testchipip.WithMbusScratchpad(banks=2, partitions=2) ++ // add 2 partitions of 2 banks mbus backing scratchpad new freechips.rocketchip.subsystem.WithNoMemPort ++ // remove offchip mem port new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new chipyard.config.AbstractConfig) // DOC include end: mbusscratchpadrocket class SbusScratchpadRocketConfig extends Config( - new testchipip.WithSbusScratchpad(base=0x70000000L, stripes=2, partitions=2) ++ // add 4 lanes sbus backing scratchpad + new testchipip.WithSbusScratchpad(base=0x70000000L, banks=4) ++ // add 4 banks sbus backing scratchpad new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new chipyard.config.AbstractConfig) diff --git a/generators/firechip/src/main/scala/TargetConfigs.scala b/generators/firechip/src/main/scala/TargetConfigs.scala index c27abd50..1c821294 100644 --- a/generators/firechip/src/main/scala/TargetConfigs.scala +++ b/generators/firechip/src/main/scala/TargetConfigs.scala @@ -138,7 +138,7 @@ class WithFireSimConfigTweaks extends Config( class WithMinimalFireSimHighPerfConfigTweaks extends Config( new WithFireSimHighPerfClocking ++ new freechips.rocketchip.subsystem.WithNoMemPort ++ - new testchipip.WithBackingScratchpad ++ + new testchipip.WithMbusScratchpad ++ new WithMinimalFireSimDesignTweaks ) @@ -148,7 +148,7 @@ class WithMinimalFireSimHighPerfConfigTweaks extends Config( class WithMinimalAndBlockDeviceFireSimHighPerfConfigTweaks extends Config( new WithFireSimHighPerfClocking ++ new freechips.rocketchip.subsystem.WithNoMemPort ++ // removes mem port for FASEDBridge to match against - new testchipip.WithBackingScratchpad ++ // adds backing scratchpad for memory to replace FASED model + new testchipip.WithMbusScratchpad ++ // adds backing scratchpad for memory to replace FASED model new testchipip.WithBlockDevice(true) ++ // add in block device new WithMinimalFireSimDesignTweaks ) @@ -329,7 +329,7 @@ class FireSim16LargeBoomConfig extends Config( class FireSimNoMemPortConfig extends Config( new WithDefaultFireSimBridges ++ new freechips.rocketchip.subsystem.WithNoMemPort ++ - new testchipip.WithBackingScratchpad ++ + new testchipip.WithMbusScratchpad ++ new WithFireSimConfigTweaks ++ new chipyard.RocketConfig) diff --git a/generators/testchipip b/generators/testchipip index 8a1540ce..35d7e196 160000 --- a/generators/testchipip +++ b/generators/testchipip @@ -1 +1 @@ -Subproject commit 8a1540ce90405433ac377de5a6d331d34ff108df +Subproject commit 35d7e1969d1d3e54d29a10901737d9b2ba2ab5a3 From e4eaa5035458fbaf08de037d9382945b7c1f3aba Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 13 Jun 2023 00:57:56 -0700 Subject: [PATCH 4/4] docs: Fix comment on rocc tag bits --- docs/Customization/RoCC-Accelerators.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/Customization/RoCC-Accelerators.rst b/docs/Customization/RoCC-Accelerators.rst index a2d98589..ad3bfa54 100644 --- a/docs/Customization/RoCC-Accelerators.rst +++ b/docs/Customization/RoCC-Accelerators.rst @@ -88,7 +88,8 @@ In your ``LazyRoCCModuleImp``, the signal ``io.mem`` is a ``HellaCacheIO``, whic At a high level, you must tag requests that you send across this interface using the ``io.mem.req.tag``, and the tag will be returned to you when the data is ready. Responses may come back out of order if you issue multiple requests, so you can use these tags to tell what data came back. -Note that the top two bits of the tag are reserved, and MUST be set to zero, or the interface will exhibit undefined behavior. +Note that the number of tag bits is controled by ``dcacheReqTagBits``, which is usually set to 6. +Using more than 6 bits will cause errors or hangs. Adding RoCC accelerator to Config