diff --git a/build.sbt b/build.sbt index 8083dc3d..05a80bba 100644 --- a/build.sbt +++ b/build.sbt @@ -147,7 +147,7 @@ lazy val sha3 = (project in file("generators/sha3")) .settings(commonSettings) lazy val tapeout = conditionalDependsOn(project in file("./tools/barstools/tapeout/")) - .dependsOn(chisel_testers) + .dependsOn(chisel_testers, example) .settings(commonSettings) lazy val mdf = (project in file("./tools/barstools/mdf/scalalib/")) diff --git a/common.mk b/common.mk index 442c62f9..112285c2 100644 --- a/common.mk +++ b/common.mk @@ -53,10 +53,12 @@ HARNESS_CONF_FLAGS = -thconf $(HARNESS_SMEMS_CONF) TOP_TARGETS = $(TOP_FILE) $(TOP_SMEMS_CONF) $(TOP_ANNO) $(TOP_FIR) $(sim_top_blackboxes) HARNESS_TARGETS = $(HARNESS_FILE) $(HARNESS_SMEMS_CONF) $(HARNESS_ANNO) $(HARNESS_FIR) $(sim_harness_blackboxes) +# DOC include start: FirrtlCompiler .INTERMEDIATE: firrtl_temp $(TOP_TARGETS) $(HARNESS_TARGETS): firrtl_temp firrtl_temp: $(FIRRTL_FILE) $(ANNO_FILE) cd $(base_dir) && $(SBT) "project tapeout" "runMain barstools.tapeout.transforms.GenerateTopAndHarness -o $(TOP_FILE) -tho $(HARNESS_FILE) -i $(FIRRTL_FILE) --syn-top $(TOP) --harness-top $(VLOG_MODEL) -faf $(ANNO_FILE) -tsaof $(TOP_ANNO) -tdf $(sim_top_blackboxes) -tsf $(TOP_FIR) -thaof $(HARNESS_ANNO) -hdf $(sim_harness_blackboxes) -thf $(HARNESS_FIR) $(REPL_SEQ_MEM) $(HARNESS_CONF_FLAGS) -td $(build_dir)" +# DOC include end: FirrtlCompiler # This file is for simulation only. VLSI flows should replace this file with one containing hard SRAMs MACROCOMPILER_MODE ?= --mode synflops diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 00000000..e35d8850 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build diff --git a/docs/Chipyard-Basics/Chipyard-Components.rst b/docs/Chipyard-Basics/Chipyard-Components.rst index 250beb85..6d54ad94 100644 --- a/docs/Chipyard-Basics/Chipyard-Components.rst +++ b/docs/Chipyard-Basics/Chipyard-Components.rst @@ -8,12 +8,13 @@ Generators The Chipyard Framework currently consists of the following RTL generators: + Processor Cores ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -**Rocket** +**Rocket Core** An in-order RISC-V core. - See :ref:`Rocket` for more information. + See :ref:`Rocket Core` for more information. **BOOM (Berkeley Out-of-Order Machine)** An out-of-order RISC-V core. diff --git a/docs/Chipyard-Basics/Configs-Parameters-Mixins.rst b/docs/Chipyard-Basics/Configs-Parameters-Mixins.rst index 2d93f2b3..e83bc9e2 100644 --- a/docs/Chipyard-Basics/Configs-Parameters-Mixins.rst +++ b/docs/Chipyard-Basics/Configs-Parameters-Mixins.rst @@ -80,17 +80,56 @@ This example shows a Rocket Chip based SoC that merges multiple system component .. code-block:: scala class MySoC(implicit p: Parameters) extends RocketSubsystem - with CanHaveMisalignedMasterAXI4MemPort + with CanHaveMasterAXI4MemPort with HasPeripheryBootROM with HasNoDebug with HasPeripherySerial with HasPeripheryUART with HasPeripheryIceNIC { - //Additional top-level specific instantiations or wiring + lazy val module = new MySoCModuleImp(this) } -Mix-in + class MySoCModuleImp(outer: MySoC) extends RocketSubsystemModuleImp(outer) + with CanHaveMasterAXI4MemPortModuleImp + with HasPeripheryBootROMModuleImp + with HasNoDebugModuleImp + with HasPeripherySerialModuleImp + with HasPeripheryUARTModuleImp + with HasPeripheryIceNICModuleImp + +There are two "cakes" here. One for the lazy module (ex. ``HasPeripherySerial``) and one for the lazy module +implementation (ex. ``HasPeripherySerialModuleImp`` where ``Imp`` refers to implementation). The lazy module defines +all the logical connections between generators and exchanges configuration information among them, while the +lazy module implementation performs the actual Chisel RTL elaboration. + +In the MySoC example class, the "outer" ``MySoC`` instantiates the "inner" +``MySoCModuleImp`` as a lazy module implementation. This delays immediate elaboration +of the module until all logical connections are determined and all configuration information is exchanged. +The ``RocketSubsystem`` outer base class, as well as the +``HasPeripheryX`` outer traits contain code to perform high-level logical +connections. For example, the ``HasPeripherySerial`` outer trait contains code +to lazily instantiate the ``SerialAdapter``, and connect the ``SerialAdapter``'s +TileLink node to the Front bus. + +The ``ModuleImp`` classes and traits perform elaboration of real RTL. +For example, the ``HasPeripherySerialModuleImp`` trait physically connects +the ``SerialAdapter`` module, and instantiates queues. + +In the test harness, the SoC is elaborated with +``val dut = Module(LazyModule(MySoC))``. +After elaboration, the result will be a MySoC module, which contains a +SerialAdapter module (among others). + +From a high level, classes which extend LazyModule *must* reference +their module implementation through ``lazy val module``, and they +*may* optionally reference other lazy modules (which will elaborate +as child modules in the module hierarchy). The "inner" modules +contain the implementation for the module, and may instantiate +other normal modules OR lazy modules (for nested Diplomacy +graphs, for example). + + Mix-in --------------------------- A mix-in is a Scala trait, which sets parameters for specific system components, as well as enabling instantiation and wiring of the relevant system components to system buses. diff --git a/docs/Chipyard-Basics/Initial-Repo-Setup.rst b/docs/Chipyard-Basics/Initial-Repo-Setup.rst index 7b7042ee..499939f4 100644 --- a/docs/Chipyard-Basics/Initial-Repo-Setup.rst +++ b/docs/Chipyard-Basics/Initial-Repo-Setup.rst @@ -1,6 +1,13 @@ Initial Repository Setup ======================================================== +Requirements +------------------------------------------- + +Chipyard is developed and tested on Linux-based systems. +It is possible to use this on macOS or other BSD-based systems, although GNU tools will need to be installed; it is also recommended to install the RISC-V toolchain from ``brew``. +Working under Windows is not recommended. + Checking out the sources ------------------------ @@ -28,6 +35,6 @@ But to get a basic installation, just the following steps are necessary. ./scripts/build-toolchains.sh esp-tools # for a modified risc-v toolchain with Hwacha vector instructions -Once the script is run, a ``env.sh`` file is emitted at sets the ``PATH``, ``RISCV``, and ``LD_LIBRARY_PATH`` environment variables. +Once the script is run, a ``env.sh`` file is emitted that sets the ``PATH``, ``RISCV``, and ``LD_LIBRARY_PATH`` environment variables. You can put this in your ``.bashrc`` or equivalent environment setup file to get the proper variables. These variables need to be set for the make system to work properly. diff --git a/docs/Chipyard-Basics/index.rst b/docs/Chipyard-Basics/index.rst index be46c627..3c05c864 100644 --- a/docs/Chipyard-Basics/index.rst +++ b/docs/Chipyard-Basics/index.rst @@ -20,5 +20,3 @@ Hit next to get started! Chipyard-Components Configs-Parameters-Mixins Initial-Repo-Setup - Running-A-Simulation - Building-A-Chip diff --git a/docs/Customization/Firrtl-Transforms.rst b/docs/Customization/Firrtl-Transforms.rst new file mode 100644 index 00000000..808082e0 --- /dev/null +++ b/docs/Customization/Firrtl-Transforms.rst @@ -0,0 +1,97 @@ +.. _firrtl-transforms: + +Adding a Firrtl Transform +========================= + +Similar to how LLVM IR passes can perform transformations and optimizations on software, FIRRTL transforms can +modify Chisel-elaborated RTL. +As mentioned in Section :ref:`firrtl`, transforms are modifications that happen on the FIRRTL IR that can modify a circuit. +Transforms are a powerful tool to take in the FIRRTL IR that is emitted from Chisel and run analysis or convert the circuit into a new form. + +Where to add transforms +----------------------- + +In Chipyard, the FIRRTL compiler is called multiple times to create a "Top" file that contains the DUT and a "Harness" file containing the test harness, which instantiates the DUT. +The "Harness" file does not contain the DUT's module definition or any of its submodules. +This is done by the ``tapeout`` SBT project (located in ``tools/barstools/tapeout``) which calls ``GenerateTopAndHarness`` (a function that wraps the multiple FIRRTL compiler calls and extra transforms). + +.. literalinclude:: ../../common.mk + :language: make + :start-after: DOC include start: FirrtlCompiler + :end-before: DOC include end: FirrtlCompiler + +If you look inside of the `tools/barstools/tapeout/src/main/scala/transforms/Generate.scala `__ file, +you can see that FIRRTL is invoked twice, once for the "Top" and once for the "Harness". If you want to add transforms to just modify the DUT, you can add them to ``topTransforms``. +Otherwise, if you want to add transforms to just modify the test harness, you can add them to ``harnessTransforms``. + +For more information on Barstools, please visit the :ref:`Barstools` section. + +Examples of transforms +---------------------- + +There are multiple examples of transforms that you can apply and are spread across the FIRRTL ecosystem. +Within FIRRTL there is a default set of supported transforms located in https://github.com/freechipsproject/firrtl/tree/master/src/main/scala/firrtl/transforms. +This includes transforms that can flatten modules (``Flatten``), group modules together (``GroupAndDedup``), and more. + +Transforms can be standalone or can take annotations as input. Annotations are used to pass information between FIRRTL transforms. This includes information on +what modules to flatten, group, and more. Annotations can be added to the code by +adding them to your Chisel source or by creating a serialized annotation ``json`` file and adding it to the FIRRTL compiler +(note: annotating the Chisel source will automatically serialize the annotation as a ``json`` snippet into the build system for you). +**The recommended way to annotate something is to do it in the Chisel source, but not all annotation types have Chisel APIs**. + +The example below shows two ways to annotate the signal using the ``DontTouchAnnotation`` +(makes sure that a particular signal is not removed by the "Dead Code Elimination" pass in FIRRTL): + +* use the Chisel API/wrapper function called ``dontTouch`` that does this automatically for you (more `dontTouch `__ information): +* directly annotate the signal with the ``annotate`` function and the ``DontTouchAnnotation`` class if there is no Chisel API for it (note: most FIRRTL annotations have Chisel APIs for them) + +.. code-block:: scala + + class TopModule extends Module { + ... + val submod = Module(new Submodule) + ... + } + + class Submodule extends Module { + ... + val some_signal := ... + + // MAIN WAY TO USE `dontTouch` + // how to annotate if there is a Chisel API/wrapper + chisel3.dontTouch(some_signal) + + // how to annotate WITHOUT a Chisel API/wrapper + annotate(new ChiselAnnotation { + def toFirrtl = DontTouchAnnotation(some_signal.toNamed) + }) + + ... + } + +Here is an example of the ``DontTouchAnnotation`` when it is serialized: + +.. code-block:: json + + [ + { + "class": "firrtl.transforms.DontTouchAnnotation", + "target": "~TopModule|Submodule>some_signal" + } + ] + +In this case, the specific syntax depends on the type of annotation and its fields. +One of the easier ways to figure out the serialized syntax is to first try and find a Chisel +annotation to add to the code. Then you can look at the collateral that is generated from the +build system, find the ``*.anno.json``, and find the proper syntax for the annotation. + +Once ``yourAnnoFile.json`` is created then you can add ``-faf yourAnnoFile.json`` to the FIRRTL +compiler invocation in ``common.mk``. + +.. literalinclude:: ../../common.mk + :language: make + :start-after: DOC include start: FirrtlCompiler + :end-before: DOC include end: FirrtlCompiler + +If you are interested in writing FIRRTL transforms please refer to the FIRRTL documentation located here: +https://github.com/freechipsproject/firrtl/wiki. diff --git a/docs/Customization/Incorporating-Verilog-Blocks.rst b/docs/Customization/Incorporating-Verilog-Blocks.rst new file mode 100644 index 00000000..64f064f8 --- /dev/null +++ b/docs/Customization/Incorporating-Verilog-Blocks.rst @@ -0,0 +1,186 @@ +.. _incorporating-verilog-blocks: + +Incorporating Verilog Blocks +============================ + +Working with existing Verilog IP is an integral part of many chip +design flows. Fortunately, both Chisel and Chipyard provide extensive +support for Verilog integration. + +Here, we will examine the process of incorporating an MMIO peripheral +(similar to the PWM example from the previous section) that uses a +Verilog implementation of Greatest Common Denominator (GCD) +algorithm. There are a few steps to adding a Verilog peripheral: + +* Adding a Verilog resource file to the project +* Defining a Chisel ``BlackBox`` representing the Verilog module +* Instantiating the ``BlackBox`` and interfacing ``RegField`` entries +* Setting up a chip ``Top`` and ``Config`` that use the peripheral + +Adding a Verilog Blackbox Resource File +--------------------------------------- + +As before, it is possible to incorporate peripherals as part of your +own generator project. However, Verilog resource files must go in a +different directory from Chisel (Scala) sources. + +.. code-block:: none + + generators/yourproject/ + build.sbt + src/main/ + scala/ + resources/ + vsrc/ + YourFile.v + +In addition to the steps outlined in the previous section on adding a +project to the ``build.sbt`` at the top level, it is also necessary to +add any projects that contain Verilog IP as dependencies to the +``tapeout`` project. This ensures that the Verilog sources are visible +to the downstream FIRRTL passes that provide utilities for integrating +Verilog files into the build process, which are part of the +``tapeout`` package in ``barstools/tapeout``. + +.. code-block:: scala + + lazy val tapeout = conditionalDependsOn(project in file("./tools/barstools/tapeout/")) + .dependsOn(chisel_testers, example, yourproject) + .settings(commonSettings) + +For this concrete GCD example, we will be using a ``GCDMMIOBlackBox`` +Verilog module that is defined in the ``example`` project. The Scala +and Verilog sources follow the prescribed directory layout. + +.. code-block:: none + + generators/example/ + build.sbt + src/main/ + scala/ + GCDMMIOBlackBox.scala + resources/ + vsrc/ + GCDMMIOBlackBox.v + +Defining a Chisel BlackBox +-------------------------- + +A Chisel ``BlackBox`` module provides a way of instantiating a module +defined by an external Verilog source. The definition of the blackbox +includes several aspects that allow it to be translated to an instance +of the Verilog module: + +* An ``io`` field: a bundle with fields corresponding to the portlist of the Verilog module. +* A constructor parameter that takes a ``Map`` from Verilog parameter name to elaborated value +* One or more resources added to indicate Verilog source dependencies + +Of particular interest is the fact that parameterized Verilog modules +can be passed the full space of possible parameter values. These +values may depend on elaboration-time values in the Chisel generator, +as the bitwidth of the GCD calculation does in this example. + +**Verilog GCD port list and parameters** + +.. literalinclude:: ../../generators/example/src/main/resources/vsrc/GCDMMIOBlackBox.v + :language: verilog + :start-after: DOC include start: GCD portlist + :end-before: DOC include end: GCD portlist + +**Chisel BlackBox Definition** + +.. literalinclude:: ../../generators/example/src/main/scala/GCDMMIOBlackBox.scala + :language: scala + :start-after: DOC include start: GCD blackbox + :end-before: DOC include end: GCD blackbox + +Instantiating the BlackBox and Defining MMIO +-------------------------------------------- + +Next, we must instantiate the blackbox. In order to take advantage of +diplomatic memory mapping on the system bus, we still have to +integrate the peripheral at the Chisel level by mixing +peripheral-specific traits into a ``TLRegisterRouter``. The ``params`` +member and ``HasRegMap`` base trait should look familiar from the +previous memory-mapped PWM device example. + +.. literalinclude:: ../../generators/example/src/main/scala/GCDMMIOBlackBox.scala + :language: scala + :start-after: DOC include start: GCD instance regmap + :end-before: DOC include end: GCD instance regmap + +Advanced Features of RegField Entries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One significant difference from the PWM example is in the peripheral's +memory map. ``RegField`` exposes polymorphic ``r`` and ``w`` methods +that allow read- and write-only memory-mapped registers to be +interfaced to hardware in multiple ways. + +* ``RegField.r(2, status)`` is used to create a 2-bit, read-only register that captures the current value of the ``status`` signal when read. +* ``RegField.r(params.width, gcd)`` "connects" the decoupled handshaking interface ``gcd`` to a read-only memory-mapped register. When this register is read via MMIO, the ``ready`` signal is asserted. This is in turn connected to ``output_ready`` on the Verilog blackbox through the glue logic. +* ``RegField.w(params.width, x)`` exposes a plain register (much like those in the PWM example) via MMIO, but makes it write-only. +* ``RegField.w(params.width, y)`` associates the decoupled interface signal ``y`` with a write-only memory-mapped register, causing ``y.valid`` to be asserted when the register is written. + +Since the ready/valid signals of ``y`` are connected to the +``input_ready`` and ``input_valid`` signals of the blackbox, +respectively, this register map and glue logic has the effect of +triggering the GCD algorithm when ``y`` is written. Therefore, the +algorithm is set up by first writing ``x`` and then performing a +triggering write to ``y``. Polling can be used for status checks. + +Defining a Chip with a GCD Peripheral +--------------------------------------- + +As with the PWM example, a few more pieces are needed to tie the system together. + +**Composing traits into a complete cake pattern peripheral** + +.. literalinclude:: ../../generators/example/src/main/scala/GCDMMIOBlackBox.scala + :language: scala + :start-after: DOC include start: GCD cake + :end-before: DOC include end: GCD cake + +Note the differences arising due to the fact that this peripheral has +no top-level IO. To build a complete system, a new ``Top`` and new +``Config`` objects are added in a manner exactly analogous to the PWM +example. + +Software Testing +---------------- + +The GCD module has a slightly more complex interface, so polling is +used to check the status of the device before each triggering read or +write. + +.. literalinclude:: ../../tests/gcd.c + :language: scala + :start-after: DOC include start: GCD test + :end-before: DOC include end: GCD test + +Support for Verilog Within Chipyard Tool Flows +---------------------------------------------- + +There are important differences in how Verilog blackboxes are treated +by various flows within the Chipyard framework. Some flows within +Chipyard rely on FIRRTL in order to provide robust, non-invasive +transformations of source code. Since Verilog blackboxes remain +blackboxes in FIRRTL, their ability to be processed by FIRRTL +transforms is limited, and some advanced features of Chipyard may +provide weaker support for blackboxes. Note that the remainder of the +design (the "non-Verilog" part of the design) may still generally be +transformed or augmented by any Chipyard FIRRTL transform. + +* Verilog blackboxes are fully supported for generating tapeout-ready RTL +* HAMMER workflows offer robust support for integrating Verilog blackboxes +* FireSim relies on FIRRTL transformations to generate a decoupled + FPGA simulator. Therefore, support for Verilog blackboxes in FireSim + is currently limited but rapidly evolving. Stay tuned! +* Custom FIRRTL transformations and analyses may sometimes be able to + handle blackbox Verilog, depending on the mechanism of the + particular transform + +As mentioned earlier in this section, ``BlackBox`` resource files must +be integrated into the build process, so any project providing +``BlackBox`` resources must be made visible to the ``tapeout`` project +in ``build.sbt`` diff --git a/docs/Customization/Memory-Hierarchy.rst b/docs/Customization/Memory-Hierarchy.rst index 39955310..207e0775 100644 --- a/docs/Customization/Memory-Hierarchy.rst +++ b/docs/Customization/Memory-Hierarchy.rst @@ -1,3 +1,5 @@ +.. _memory-hierarchy: + Memory Hierarchy =============================== diff --git a/docs/Customization/index.rst b/docs/Customization/index.rst index c0432e73..cd52a499 100644 --- a/docs/Customization/index.rst +++ b/docs/Customization/index.rst @@ -15,5 +15,7 @@ Hit next to get started! Heterogeneous-SoCs Adding-An-Accelerator + Incorporating-Verilog-Blocks Memory-Hierarchy Boot-Process + Firrtl-Transforms diff --git a/docs/Generators/BOOM.rst b/docs/Generators/BOOM.rst index 79cc0a54..38b5425d 100644 --- a/docs/Generators/BOOM.rst +++ b/docs/Generators/BOOM.rst @@ -1,6 +1,8 @@ Berkeley Out-of-Order Machine (BOOM) ============================================== +.. image:: ../_static/images/boom-pipeline-detailed.png + The `Berkeley Out-of-Order Machine (BOOM) `__ is a synthesizable and parameterizable open source RV64GC RISC-V core written in the Chisel hardware construction language. It serves as a drop-in replacement to the Rocket core given by Rocket Chip (replaces the RocketTile with a BoomTile). BOOM is heavily inspired by the MIPS R10k and the Alpha 21264 out-of-order processors. diff --git a/docs/Generators/Hwacha.rst b/docs/Generators/Hwacha.rst index d040d35d..240d6fc9 100644 --- a/docs/Generators/Hwacha.rst +++ b/docs/Generators/Hwacha.rst @@ -2,7 +2,14 @@ Hwacha ==================================== The Hwacha project is developing a new vector architecture for future computer systems that are constrained in their power and energy consumption. -Inspired by traditional vector machines from the 70s and 80s, and lessons learned from our previous vector-thread architectures Scale and Maven, we are bringing back elegant, performant, and energy-efficient aspects of vector processing to modern data-parallel architectures. -We propose a new vector-fetch architectural paradigm, which focuses on the following aspects for higher performance, better energy efficiency, and lower complexity. +The Hwacha project is inspired by traditional vector machines from the 70s and 80s, and lessons learned from our previous vector-thread architectures such as Scale and Maven +The Hwacha project includes the Hwacha microarchitecture generator, as well as the ``XHwacha`` non-standard RISC-V extension. Hwacha does not implement the RISC-V standard vector extension proposal. -For more information, please visit the `Hwacha website `__. +For more information on the Hwacha project, please visit the `Hwacha website `__. + +To add the Hwacha vector unit to an SoC, you should add the ``hwacha.DefaultHwachaConfig`` config mixin to the SoC configurations. The Hwacha vector unit uses the RoCC port of a Rocket or BOOM `tile`, and by default connects to the memory system through the `System Bus` (i.e., directly to the L2 cache). + +To change the configuration of the Hwacha vector unit, you can write a custom configuration to replace the ``DefaultHwachaConfig``. You can view the ``DefaultHwachaConfig`` under `generators/hwacha/src/main/scala/configs.scala `__ to see the possible configuration parameters. + +Since Hwacha implements a non-standard RISC-V extension, it requires a unique software toolchain to be able to compile and asseble its vector instructions. +To install the Hwacha toolchain, run the ``./scripts/build-toolchains.sh esp-tools`` command within the root Chipyard directory. This may take a while, and it will install the ``esp-tools-install`` directory within your Chipyard root directory. ``esp-tools`` is a fork of ``riscv-tools`` (formerly a collection of relevant software RISC-V tools) that was enhanced with additional non-standard vector instructions. However, due to the upstreaming of the equivalent RISC-V toolchains, ``esp-tools`` may not be up-to-date with the latest mainline version of the tools included in it. diff --git a/docs/Generators/IceNet.rst b/docs/Generators/IceNet.rst new file mode 100644 index 00000000..b520eb6c --- /dev/null +++ b/docs/Generators/IceNet.rst @@ -0,0 +1,87 @@ +IceNet +====== + +IceNet is a library of Chisel designs related to networking. The main component +of IceNet is IceNIC, a network interface controller that is used primarily +in `FireSim `_ for multi-node networked simulation. +A diagram of IceNet's microarchitecture is shown below. + +.. image:: ../_static/images/nic-design.png + +There are four basic parts of the NIC: the :ref:`Controller`, which takes requests +from and sends responses to the CPU; the :ref:`Send Path`, which reads data from +memory and sends it out to the network; the :ref:`Receive Path`, which receives +data from the network and writes it to memory; and, optionally, +the :ref:`Pause Handler`, which generates Ethernet pause frames for the purpose +of flow control. + +Controller +---------- + +The controller exposes a set of MMIO registers to the CPU. The device driver +writes to registers to request that packets be sent or to provide memory +locations to write received data to. Upon the completion of a send request or +packet receive, the controller sends an interrupt to the CPU, which clears +the completion by reading from another register. + +Send Path +--------- + +The send path begins at the reader, which takes requests from the controller +and reads the data from memory. + +Since TileLink responses can come back out-of-order, we use a reservation +queue to reorder responses so that the packet data can be sent out in the +proper order. + +The packet data then goes to an arbiter, which can arbitrate access to the +outbound network interface between the NIC and one or more "tap in" interfaces, +which come from other hardware modules that may want to send Ethernet packets. +By default, there are no tap in interfaces, so the arbiter simply passes +the output of the reservation buffer through. + +Receive Path +------------ + +The receive path begins with the packet buffer, which buffers data coming +in from the network. If there is insufficient space in the buffer, it will +drop data at packet granularity to ensure that the NIC does not deliver +incomplete packets. + +From the packet buffer, the data can optionally go to a network tap, which +examines the Ethernet header and select packets to be redirected from the NIC +to external modules through one or more "tap out" interfaces. By default, there +are no tap out interfaces, so the data will instead go directly to the writer, +which writes the data to memory and then sends a completion to the controller. + +Pause Handler +------------- + +IceNIC can be configured to have pause handler, which sits between the +send and receive paths and the Ethernet interface. This module tracks the +occupancy of the receive packet buffer. If it sees the buffer filling up, it +will send an `Ethernet pause frame `_ +out to the network to block further packets from being sent. If the NIC receives +an Ethernet pause frame, the pause handler will block sending from the NIC. + +Linux Driver +------------ + +The default Linux configuration provided by `firesim-software `_ +contains an IceNet driver. If you launch a FireSim image that has IceNIC on it, +the driver will automatically detect the device, and you will be able to use +the full Linux networking stack in userspace. + +Configuration +------------- + +To add IceNIC to your design, add ``HasPeripheryIceNIC`` to your lazy module +and ``HasPeripheryIceNICModuleImp`` to the module implementation. If you +are confused about the distinction between lazy module and module +implementation, refer to :ref:`Cake Pattern`. + +Then add the ``WithIceNIC`` config mixin to your configuration. This will +define ``NICKey``, which IceNIC uses to determine its parameters. The mixin +takes two arguments. The ``inBufFlits`` argument is the number of 64-bit flits +that the input packet buffer can hold and the ``usePauser`` argument determines +whether or not the NIC will have a pause handler. diff --git a/docs/Generators/RocketChip.rst b/docs/Generators/Rocket-Chip.rst similarity index 82% rename from docs/Generators/RocketChip.rst rename to docs/Generators/Rocket-Chip.rst index b6050534..8ef12746 100644 --- a/docs/Generators/RocketChip.rst +++ b/docs/Generators/Rocket-Chip.rst @@ -1,15 +1,15 @@ -RocketChip -========== +Rocket Chip +=========== -RocketChip is an SoC generator developed at Berkeley and now supported by -SiFive. Chipyard uses RocketChip as the basis for producing a RISC-V SoC. +Rocket Chip generator is an SoC generator developed at Berkeley and now supported by +SiFive. Chipyard uses the Rocket Chip generator as the basis for producing a RISC-V SoC. -RocketChip is distinct from Rocket, the in-order RISC-V CPU generator. -RocketChip includes many parts of the SoC besides the CPU. Though RocketChip -uses Rocket CPUs by default, it can also be configured to use the BOOM +`Rocket Chip` is distinct from `Rocket core`, the in-order RISC-V CPU generator. +Rocket Chip includes many parts of the SoC besides the CPU. Though Rocket Chip +uses Rocket core CPUs by default, it can also be configured to use the BOOM out-of-order core generator or some other custom CPU generator instead. -A detailed diagram of a typical RocketChip system is shown below. +A detailed diagram of a typical Rocket Chip system is shown below. .. image:: ../_static/images/rocketchip-diagram.png diff --git a/docs/Generators/Rocket.rst b/docs/Generators/Rocket.rst index 401b9e36..6c55b761 100644 --- a/docs/Generators/Rocket.rst +++ b/docs/Generators/Rocket.rst @@ -1,8 +1,9 @@ -Rocket +Rocket Core ==================================== -`Rocket `__ is a 5-stage in-order scalar core generator that is supported by `SiFive `__. -It supports the open source RV64GC RISC-V instruction set and is written in the Chisel hardware construction language. +`Rocket `__ is a 5-stage in-order scalar processor core generator, originally developed at UC Berkeley and currently supported by `SiFive `__. The `Rocket core` is used as a component within the `Rocket Chip SoC generator`. A Rocket core combined with L1 caches (data and instruction caches) form a `Rocket tile`. The `Rocket tile` is the replicable component of the `Rocket Chip SoC generator`. + +The Rocket core supports the open-source RV64GC RISC-V instruction set and is written in the Chisel hardware construction language. It has an MMU that supports page-based virtual memory, a non-blocking data cache, and a front-end with branch prediction. Branch prediction is configurable and provided by a branch target buffer (BTB), branch history table (BHT), and a return address stack (RAS). For floating-point, Rocket makes use of Berkeley’s Chisel implementations of floating-point units. diff --git a/docs/Generators/SHA3.rst b/docs/Generators/SHA3.rst new file mode 100644 index 00000000..d48b3017 --- /dev/null +++ b/docs/Generators/SHA3.rst @@ -0,0 +1,80 @@ +SHA3 RoCC Accelerator +=================================== +The SHA3 accelerator is a basic RoCC accelerator for the SHA3 hashing algorithm. +We like using SHA3 in Chipyard tutorial content because it is a self-contained, simple +example of integrating a custom accelerator into Chipyard. + + +Introduction +----------------------------------- +Secure hashing algorithms represent a class of hashing functions that provide four attributes: ease +of hash computation, inability to generate the message from the hash (one-way property), inability +to change the message and not the hash (weakly collision free property), and inability to find +two messages with the same hash (strongly collision free property). The National Institute of +Standards and Technology (NIST) recently held a competition for a new algorithm to be added to +its set of Secure Hashing Algorithms (SHA). In 2012 the winner was determined to be the Keccak +hashing function and a rough specification for SHA3 was established. The algorithm operates on +variable length messages with a sponge function, and thus alternates between absorbing chunks of +the message into a set of state bits and permuting the state. The absorbing is a simple bitwise +XOR while the permutation is a more complex function composed of several operations, χ, θ, ρ, +π, ι, that all perform various bitwise operations, including rotations, parity calculations, XORs, +etc. The Keccak hashing function is parameterized for different sizes of state and message chunks +but for this accelerator we will only support the Keccak-256 variant with 1600 bits of state and +1088 bit message chunks. A diagram of the SHA3 accelerator is shown below. + +.. image:: ../_static/images/sha3.png + +Technical Details +------------------------------------ +The accelerator is designed around three sub-systems, an +interface with the processor, an interface with memory, and +the actual hashing computation system. The interface +with the processor is designed using the ROCC interface for +coprocessors integrating with the RISC-V Rocket/BOOM +processor. It includes the ability to transfer two 64 bit +words to the co-processor, the request for a return value, +and a small field for the function requested. The accelerator +receives these requests using a ready/valid interface. The +ROCC instruction is parsed and the needed information is +stored into a execution context. The execution context contains +the memory address of the message being hashed, the memory address +to store the resulting hash in, the length of the message, and +several other control fields. + +Once the execution context is valid the memory subsystem +then begins to fetch chunks of the message. The memory +subsystem is fully decoupled from the other subsystems +and maintains a single full round memory buffers. +The accelerators memory interface can provide a +maximum of one 64 bit word per cycle which corresponds +to 17 requests needed to fill a buffer (the size is dictated by +the SHA3 algorithm). Memory requests to fill these buffers +are sent out as rapidly as the memory interface can handle, +with a tag field set to allow the different memory buffers +requests to be distinguished, as they may be returned out of +order. Once the memory subsystem has filled a buffer the +control unit absorbs the buffer into the execution +context, at which point the execution context is free to +begin permutation, and the memory buffer is free to send +more memory requests. + +After the buffer is absorbed, the hashing computation +subsystem begins the permutation operations. Once +the message is fully hashed, the hash is written to memory +with a simple state machine. + + +Using a SHA3 Accelerator +------------------------ +Since the SHA3 accelerator is designed as a RoCC accelerator, +it can be mixed into a Rocket or BOOM core by overriding the +BuildRoCC key. The configuration mixin is defined in the SHA3 +generator. An example configuration highlighting the use of +this mixin is shown here: + +.. literalinclude:: ../../generators/example/src/main/scala/RocketConfigs.scala + :language: scala + :start-after: DOC include start: Sha3Rocket + :end-before: DOC include end: Sha3Rocket + + diff --git a/docs/Generators/SiFive-Generators.rst b/docs/Generators/SiFive-Generators.rst new file mode 100644 index 00000000..8f2202b0 --- /dev/null +++ b/docs/Generators/SiFive-Generators.rst @@ -0,0 +1,45 @@ +SiFive Generators +================== + +Chipyard includes several open-source generators developed and maintained by `SiFive `__. +These are currently organized within two submodules named ``sifive-blocks`` and ``sifive-cache``. + +Last-Level Cache Generator +----------------------------- + +``sifive-cache`` includes last-level cache geneator. The Chipyard framework uses this last-level cache as an L2 cache. To use this L2 cache, you should add the ``freechips.rocketchip.subsystem.WithInclusiveCache`` mixin to your SoC configuration. +To learn more about configuring this L2 cache, please refer to the :ref:`memory-hierarchy` section. + + +Peripheral Devices +------------------- +``sifive-blocks`` includes multiple peripheral device generators, such as UART, SPI, PWM, JTAG, GPIO and more. + +These peripheral devices usually affect the memory map of the SoC, and its top-level IO as well. +To integrate one of these devices in your SoC, you will need to define a custom mixin with the approriate address for the device using the Rocket Chip parameter system. As an example, for a GPIO device you could add the following mixin to set the GPIO address to ``0x10012000``. This address is the start address for the GPIO configuration registers. + +.. literalinclude:: ../../generators/example/src/main/scala/ConfigMixins.scala + :language: scala + :start-after: DOC include start: WithGPIO + :end-before: DOC include end: WithGPIO + +Additionally, if the device requires top-level IOs, you will need to define a mixin to change the top-level configuration of your SoC. +When adding a top-level IO, you should also be aware of whether it interacts with the test-harness. +For example, a GPIO device would require a GPIO pin, and therefore we would write a mixin to augment the top level as follows: + +.. literalinclude:: ../../generators/example/src/main/scala/ConfigMixins.scala + :language: scala + :start-after: DOC include start: WithGPIOTop + :end-before: DOC include end: WithGPIOTop + +This example instantiates a top-level module with include GPIO ports (``TopWithGPIO``), and then ties-off the GPIO port inputs to 0 (``false.B``). + + +Finally, you add the relevant config mixin to the SoC config. For example: + +.. literalinclude:: ../../generators/example/src/main/scala/RocketConfigs.scala + :language: scala + :start-after: DOC include start: GPIORocketConfig + :end-before: DOC include end: GPIORocketConfig + +Some of the devices in ``sifive-blocks`` (such as GPIO) may already have pre-defined mixins within the Chipyard example project. You may be able to use these config mixins directly, but you should be aware of their addresses within the SoC address map. diff --git a/docs/Generators/TestChipIP.rst b/docs/Generators/TestChipIP.rst new file mode 100644 index 00000000..7d490c5f --- /dev/null +++ b/docs/Generators/TestChipIP.rst @@ -0,0 +1,63 @@ +Test Chip IP +============ + +Chipyard includes a Test Chip IP library which provides various hardware +widgets that may be useful when designing SoCs. This includes a :ref:`Serial Adapter`, +:ref:`Block Device Controller`, :ref:`TileLink SERDES`, and :ref:`TileLink Switcher`. + +Serial Adapter +-------------- + +The serial adapter is used by tethered test chips to communicate with the host +processor. An instance of RISC-V frontend server running on the host CPU +can send commands to the serial adapter to read and write data from the memory +system. The frontend server uses this functionality to load the test program +into memory and to poll for completion of the program. More information on +this can be found in :ref:`Chipyard Boot Process`. + +Block Device Controller +----------------------- + +The block device controller provides a generic interface for secondary storage. +This device is primarily used in FireSim to interface with a block device +software simulation model. The default Linux configuration in `firesim-software `_ + +To add a block device to your design, add ``HasPeripheryBlockDevice`` to your +lazy module and ``HasPeripheryBlockDeviceModuleImp`` to the implementation. +Then add the ``WithBlockDevice`` config mixin to your configuration. + + +TileLink SERDES +--------------- + +The TileLink SERDES in the Test Chip IP library allow TileLink memory requests +to be serialized so that they can be carried off chip through a serial link. +The five TileLink channels are multiplexed over two SERDES channels, one in +each direction. + +There are three different variants provided by the library, ``TLSerdes`` +exposes a manager interface to the chip, tunnels A, C, and E channels on +its outbound link, and tunnels B and D channels on its inbound link. ``TLDesser`` +exposes a client interface to the chip, tunnels A, C, and E on its inbound link, +and tunnels B and D on its outbound link. Finally, ``TLSerdesser`` exposes +both client and manager interface to the chip and can tunnel all channels in +both directions. + +For an example of how to use the SERDES classes, take a look at the +``SerdesTest`` unit test in `the Test Chip IP unit test suite +`_. + +TileLink Switcher +----------------- + +The TileLink switcher is used when the chip has multiple possible memory +interfaces and you would like to select which channels to map your memory +requests to at boot time. It exposes a client node, multiple manager nodes, +and a select signal. Depending on the setting of the select signal, requests +from the client node will be directed to one of the manager nodes. +The select signal must be set before any TileLink messages are sent and be +kept stable throughout the remainder of operation. It is not safe to change +the select signal once TileLink messages have begun sending. + +For an example of how to use the switcher, take a look at the ``SwitcherTest`` +unit test in the `Test Chip IP unit tests `_. diff --git a/docs/Generators/index.rst b/docs/Generators/index.rst index a147ffeb..462c20e2 100644 --- a/docs/Generators/index.rst +++ b/docs/Generators/index.rst @@ -1,17 +1,28 @@ +.. _generator-index: + Generators ============================ -Generator can be thought of as a generalized RTL design, written using a mix of meta-programming and standard RTL. +A Generator can be thought of as a generalized RTL design, written using a mix of meta-programming and standard RTL. This type of meta-programming is enabled by the Chisel hardware description language (see :ref:`Chisel`). A standard RTL design is essentially just a single instance of a design coming from a generator. However, by using meta-programming and parameter systems, generators can allow for integration of complex hardware designs in automated ways. The following pages introduce the generators integrated with the Chipyard framework. +Chipyard bundles the source code for the generators, under the ``generators`` directory. +It builds them from source each time (although the build system will cache results if they have not changed), +so changes to the generators themselves will automatically be used when building with Chipyard and propagate to software simulation, FPGA-accelerated simulation, and VLSI flows. + + .. toctree:: :maxdepth: 2 :caption: Generators: + Rocket-Chip Rocket BOOM Hwacha - RocketChip + IceNet + TestChipIP + SiFive-Generators + SHA3 diff --git a/docs/Quick-Start.rst b/docs/Quick-Start.rst index 936597e6..542e3a30 100644 --- a/docs/Quick-Start.rst +++ b/docs/Quick-Start.rst @@ -1,6 +1,13 @@ Quick Start =============================== +Requirements +------------------------------------------- + +Chipyard is developed and tested on Linux-based systems. +It is possible to use this on macOS or other BSD-based systems, although GNU tools will need to be installed; it is also recommended to install the RISC-V toolchain from ``brew``. +Working under Windows is not recommended. + Setting up the Chipyard Repo ------------------------------------------- @@ -28,22 +35,29 @@ To build the toolchains, you should run: .. Note:: If you are planning to use the Hwacha vector unit, or other RoCC-based accelerators, you should build the esp-tools toolchains by adding the ``esp-tools`` argument to the script above. If you are running on an Amazon Web Services EC2 instance, intending to use FireSim, you can also use the ``--ec2fast`` flag for an expedited installation of a pre-compiled toolchain. +Finally, set up Chipyard's environment variables and put the newly built toolchain on your path: + +.. code-block:: shell + + source ./env.sh What's Next? ------------------------------------------- This depends on what you are planning to do with Chipyard. -* If you want to learn about the structure of Chipyard, go to :ref:`chipyard-components`. - -* If you intend to build one of the vanilla Chipyard examples, go to :ref:`build-a-chip` and follow the instructions. - -* If you intend to add a new accelerator, go to :ref:`adding-an-accelerator` and follow the instructions. - * If you intend to run a simulation of one of the vanilla Chipyard examples, go to :ref:`sw-rtl-sim-intro` and follow the instructions. -* If you intend to run a simulation of a custom Chipyard SoC Configuration, go to <> and follow the instructions. +* If you intend to run a simulation of a custom Chipyard SoC Configuration, go to :ref:`Simulating A Custom Project` and follow the instructions. * If you intend to run a full-system FireSim simulation, go to :ref:`firesim-sim-intro` and follow the instructions. +* If you intend to add a new accelerator, go to :ref:`adding-an-accelerator` and follow the instructions. + +* If you want to learn about the structure of Chipyard, go to :ref:`chipyard-components`. + +* If you intend to change the generators (BOOM, Rocket, etc) themselves, see :ref:`generator-index`. + * If you intend to run a VLSI flow using one of the vanilla Chipyard examples, go to <> and follow the instructions. + +* If you intend to build a chip using one of the vanilla Chipyard examples, go to :ref:`build-a-chip` and follow the instructions. diff --git a/docs/Chipyard-Basics/Running-A-Simulation.rst b/docs/Simulation/Running-A-Simulation.rst similarity index 99% rename from docs/Chipyard-Basics/Running-A-Simulation.rst rename to docs/Simulation/Running-A-Simulation.rst index 76eb0acb..a4346ed5 100644 --- a/docs/Chipyard-Basics/Running-A-Simulation.rst +++ b/docs/Simulation/Running-A-Simulation.rst @@ -39,13 +39,21 @@ In order to construct the simulator with our custom design, we run the following make SBT_PROJECT=... MODEL=... VLOG_MODEL=... MODEL_PACKAGE=... CONFIG=... CONFIG_PACKAGE=... GENERATOR_PACKAGE=... TB=... TOP=... Each of these make variables correspond to a particular part of the design/codebase and are needed so that the make system can correctly build and make a RTL simulation. + The ``SBT_PROJECT`` is the ``build.sbt`` project that holds all of the source files and that will be run during the RTL build. + The ``MODEL`` and ``VLOG_MODEL`` are the top-level class names of the design. + Normally, these are the same, but in some cases these can differ (if the Chisel class differs than what is emitted in the Verilog). + The ``MODEL_PACKAGE`` is the Scala package (in the Scala code that says ``package ...``) that holds the ``MODEL`` class. + The ``CONFIG`` is the name of the class used for the parameter Config while the ``CONFIG_PACKAGE`` is the Scala package it resides in. + The ``GENERATOR_PACKAGE`` is the Scala package that holds the Generator class that elaborates the design. + The ``TB`` is the name of the Verilog wrapper that connects the ``TestHarness`` to VCS/Verilator for simulation. + Finally, the ``TOP`` variable is used to distinguish between the top-level of the design and the ``TestHarness`` in our system. For example, in the normal case, the ``MODEL`` variable specifies the ``TestHarness`` as the top-level of the design. However, the true top-level design, the SoC being simulated, is pointed to by the ``TOP`` variable. diff --git a/docs/Simulation/Software-RTL-Simulation.rst b/docs/Simulation/Software-RTL-Simulation.rst new file mode 100644 index 00000000..596c47b1 --- /dev/null +++ b/docs/Simulation/Software-RTL-Simulation.rst @@ -0,0 +1,140 @@ +.. _sw-rtl-sim-intro: + +Software RTL Simulation +=================================== + +Verilator (Open-Source) +----------------------- + +`Verilator `__ is an open-source LGPL-Licensed simulator maintained by `Veripool `__. +The Chipyard framework can download, build, and execute simulations using Verilator. + + +Synopsys VCS (License Required) +-------------------------------- + +`VCS `__ is a commercial RTL simulator developed by Synopsys. +It requires commercial licenses. +The Chipyard framework can compile and execute simulations using VCS. +VCS simulation will generally compile faster than Verilator simulations. + +To run a VCS simulation, make sure that the VCS simulator is on your ``PATH``. + + +Choice of Simulator +------------------------------- + +First, we will start by entering the Verilator or VCS directory: + +For an open-source Verilator simulation, enter the ``sims/verilator`` directory + +.. code-block:: shell + + # Enter Verilator directory + cd sims/verilator + +For a proprietry VCS simulation, enter the ``sims/vcs`` directory + +.. code-block:: shell + + # Enter VCS directory + cd sims/vcs + + +.. _sim-default: + +Simulating The Default Example +------------------------------- + +To compile the example design, run ``make`` in the selected verilator or VCS directory. +This will elaborate the ``RocketConfig`` in the example project. + +An executable called ``simulator-example-RocketConfig`` will be produced. +This executable is a simulator that has been compiled based on the design that was built. +You can then use this executable to run any compatible RV64 code. +For instance, to run one of the riscv-tools assembly tests. + +.. code-block:: shell + + ./simulator-example-RocketConfig $RISCV/riscv64-unknown-elf/share/riscv-tests/isa/rv64ui-p-simple + +.. Note:: in a VCS simulator, the simulator name will be ``simv-example-RocketConfig`` ``instead of simulator-example-RocketConfig``. + +Alternatively, we can run a pre-packaged suite of RISC-V assembly or benchmark tests, by adding the make target ``run-asm-tests`` or ``run-bmark-tests``. +For example: + +.. code-block:: shell + + make run-asm-tests + make run-bmark-tests + + +.. Note:: Before running the pre-packaged suites, you must run the plain ``make`` command, since the elaboration command generates a Makefile fragment that contains the target for the pre-packaged test suites. Otherwise, you will likely encounter a Makefile target error. + + +.. _sw-sim-custom: + +Simulating A Custom Project +------------------------------- + +If you later create your own project, you can use environment variables to build an alternate configuration. + +In order to construct the simulator with our custom design, we run the following command within the simulator directory: + +.. code-block:: shell + + make SBT_PROJECT=... MODEL=... VLOG_MODEL=... MODEL_PACKAGE=... CONFIG=... CONFIG_PACKAGE=... GENERATOR_PACKAGE=... TB=... TOP=... + +Each of these make variables correspond to a particular part of the design/codebase and are needed so that the make system can correctly build and make a RTL simulation. + +The ``SBT_PROJECT`` is the ``build.sbt`` project that holds all of the source files and that will be run during the RTL build. + +The ``MODEL`` and ``VLOG_MODEL`` are the top-level class names of the design. Normally, these are the same, but in some cases these can differ (if the Chisel class differs than what is emitted in the Verilog). + +The ``MODEL_PACKAGE`` is the Scala package (in the Scala code that says ``package ...``) that holds the ``MODEL`` class. + +The ``CONFIG`` is the name of the class used for the parameter Config while the ``CONFIG_PACKAGE`` is the Scala package it resides in. + +The ``GENERATOR_PACKAGE`` is the Scala package that holds the Generator class that elaborates the design. + +The ``TB`` is the name of the Verilog wrapper that connects the ``TestHarness`` to VCS/Verilator for simulation. + +Finally, the ``TOP`` variable is used to distinguish between the top-level of the design and the ``TestHarness`` in our system. +For example, in the normal case, the ``MODEL`` variable specifies the ``TestHarness`` as the top-level of the design. +However, the true top-level design, the SoC being simulated, is pointed to by the ``TOP`` variable. +This separation allows the infrastructure to separate files based on the harness or the SoC top level. + +Common configurations of all these variables are packaged using a ``SUB_PROJECT`` make variable. +Therefore, in order to simulate a simple Rocket-based example system we can use: + + +.. code-block:: shell + + make SUB_PROJECT=yourproject + ./simulator-- ... + + +All `Make` targets that can be applied to the default example, can also be applied to custom project using the custom environment variables. For example, the following code example will run the RISC-V assembly benchmark suite on the Hwacha subproject: + +.. code-block:: shell + + make SUB_PROJECT=hwacha run-asm-tests + + +Finally, in the ``generated-src/<...>--/`` directory resides all of the collateral and Verilog source files for the build/simulation. +Specifically, the SoC top-level (``TOP``) Verilog file is denoted with ``*.top.v`` while the ``TestHarness`` file is denoted with ``*.harness.v``. + + +Generating Waveforms +----------------------- + +If you would like to extract waveforms from the simulation, run the command ``make debug`` instead of just ``make``. + + +For a Verilator simulation, this will generate a vcd file (vcd is a standard waveform representation file format) that can be loaded to any common waveform viewer. +An open-source vcd-capable waveform viewer is `GTKWave `__. + + +For a VCS simulation, this will generate a vpd file (this is a proprietary waveform representation format used by Synopsys) that can be loaded to vpd-supported waveform viewers. +If you have Synopsys licenses, we recommend using the DVE waveform viewer. + diff --git a/docs/Simulation/Software-RTL-Simulators.rst b/docs/Simulation/Software-RTL-Simulators.rst deleted file mode 100644 index 5dd4e527..00000000 --- a/docs/Simulation/Software-RTL-Simulators.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. _sw-rtl-sim-intro: - -Software RTL Simulators -=================================== - -Verilator (Open-Source) ------------------------ - -`Verilator `__ is an open-source LGPL-Licensed simulator maintained by `Veripool `__. -The Chipyard framework can download, build, and execute simulations using Verilator. - -To run a simulation using Verilator, perform the following steps: - -To compile the example design, run ``make`` in the ``sims/verilator`` directory. -This will elaborate the ``RocketConfig`` in the example project. - -An executable called ``simulator-example-RocketConfig`` will be produced. -This executable is a simulator that has been compiled based on the design that was built. -You can then use this executable to run any compatible RV64 code. -For instance, to run one of the riscv-tools assembly tests. - -.. code-block:: shell - - ./simulator-example-RocketConfig $RISCV/riscv64-unknown-elf/share/riscv-tests/isa/rv64ui-p-simple - -If you later create your own project, you can use environment variables to build an alternate configuration. - -.. code-block:: shell - - make SUB_PROJECT=yourproject - ./simulator-- ... - -If you would like to extract waveforms from the simulation, run the command ``make debug`` instead of just ``make``. -This will generate a vcd file (vcd is a standard waveform representation file format) that can be loaded to any common waveform viewer. -An open-source vcd-capable waveform viewer is `GTKWave `__. - -Please refer to :ref:`Running A Simulation` for a step by step tutorial on how to get a simulator up and running. -Commercial Software RTL Simulators - -Synopsys VCS (License Required) --------------------------------- - -`VCS `__ is a commercial RTL simulator developed by Synopsys. -It requires commercial licenses. -The Chipyard framework can compile and execute simulations using VCS. -VCS simulation will generally compile faster than Verilator simulations. - -To run a simulation using VCS, perform the following steps: - -Make sure that the VCS simulator is on your ``PATH``. - -To compile the example design, run make in the ``sims/vcs`` directory. -This will elaborate the ``RocketConfig`` in the example project. - -An executable called ``simulator-example-RocketConfig`` will be produced. -This executable is a simulator that has been compiled based on the design that was built. -You can then use this executable to run any compatible RV64 code. -For instance, to run one of the riscv-tools assembly tests. - -.. code-block:: shell - - ./simulator-example-RocketConfig $RISCV/riscv64-unknown-elf/share/riscv-tests/isa/rv64ui-p-simple - -If you later create your own project, you can use environment variables to build an alternate configuration. - -.. code-block:: shell - - make SUB_PROJECT=yourproject - ./simulator-- ... - -If you would like to extract waveforms from the simulation, run the command ``make debug`` instead of just ``make``. -This will generate a vpd file (this is a proprietary waveform representation format used by Synopsys) that can be loaded to vpd-supported waveform viewers. -If you have Synopsys licenses, we recommend using the DVE waveform viewer. - -Please refer to :ref:`Running A Simulation` for a step by step tutorial on how to get a simulator up and running. diff --git a/docs/Simulation/index.rst b/docs/Simulation/index.rst index 50dbb57e..fe0fa161 100644 --- a/docs/Simulation/index.rst +++ b/docs/Simulation/index.rst @@ -1,4 +1,4 @@ -Simulators +Simulation ======================= Chipyard supports two classes of simulation: @@ -12,9 +12,11 @@ at O(100 MHz), making them appropriate for booting an operating system and running a complete workload, but have multi-hour compile times and poorer debug visability. +Click next to see how to run a simulation. + .. toctree:: :maxdepth: 2 - :caption: Simulators: + :caption: Simulation: - Software-RTL-Simulators + Software-RTL-Simulation FPGA-Accelerated-Simulators diff --git a/docs/Software/FireMarshal.rst b/docs/Software/FireMarshal.rst new file mode 100644 index 00000000..ecc23736 --- /dev/null +++ b/docs/Software/FireMarshal.rst @@ -0,0 +1,23 @@ +FireMarshal +================= + +FireMarshal is a workload generation tool for RISC-V based systems. It +currently only supports the FireSim FPGA-accelerated simulation platform. + +**Workloads** in FireMarshal consist of a series of **Jobs** that are assigned +to logical nodes in the target system. If no jobs are specified, then the +workload is considered ``uniform`` and only a single image will be produced for +all nodes in the system. Workloads are described by a ``json`` file and a +corresponding workload directory and can inherit their definitions from +existing workloads. Typically, workload configurations are kept in +``workloads/`` although you can use any directory you like. We provide a few +basic workloads to start with including buildroot or Fedora-based linux +distributions and bare-metal. + +Once you define a workload, the ``marshal`` command will produce a +corresponding boot-binary and rootfs for each job in the workload. This binary +and rootfs can then be launched on qemu or spike (for functional simulation), or +installed to a platform for running on real RTL (currently only FireSim is +automated). + +To get started, checkout the full `FireMarshal documentation `_. diff --git a/docs/Software/Spike.rst b/docs/Software/Spike.rst new file mode 100644 index 00000000..015ec5b2 --- /dev/null +++ b/docs/Software/Spike.rst @@ -0,0 +1,4 @@ +The RISC-V ISA Simulator (Spike) +================================= + +.. attention:: This article is a stub. Fill it out! diff --git a/docs/Software/index.rst b/docs/Software/index.rst new file mode 100644 index 00000000..e7fe9925 --- /dev/null +++ b/docs/Software/index.rst @@ -0,0 +1,21 @@ +Target Software +================================== + +Chipyard includes tools for developing target software workloads. The primary +tool is FireMarshal, which manages workload descriptions and generates binaries +and disk images to run on your target designs. Workloads can be bare-metal, or +be based on standard Linux distributions. Users can customize every part of the +build process, including providing custom kernels (if needed by the hardware). + +FireMarshal can also run your workloads on high-performance functional +simulators like Spike and Qemu. Spike is easily customized and serves as the +official RISC-V ISA reference implementation. Qemu is a high-performance +functional simulator that can run nearly as fast as native code, but can be +challenging to modify. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + FireMarshal + Spike diff --git a/docs/Tools/FIRRTL.rst b/docs/Tools/FIRRTL.rst index 23f39f16..b850b39e 100644 --- a/docs/Tools/FIRRTL.rst +++ b/docs/Tools/FIRRTL.rst @@ -1,3 +1,4 @@ +.. _firrtl: FIRRTL ================================ diff --git a/docs/Tools/Treadle.rst b/docs/Tools/Treadle.rst index 19df75de..4c6d016c 100644 --- a/docs/Tools/Treadle.rst +++ b/docs/Tools/Treadle.rst @@ -1,5 +1,9 @@ -Treadle +Treadle and FIRRTL Interpreter ============================== -`Treadle `__ is a circuit simulator that directly executes FIRRTL. -It is especially useful for interactive debugging and small unit tests that benefit from a low-overhead simulator. +`Treadle `__ and +`FIRRTL Interpreter `__ +are circuit simulators that directly execute FIRRTL (specifically low-firrtl IR). +Treadle is the replacement for FIRRTL Interpreter but FIRRTL Interpreter is still used within some +projects. Treadle is useful for simulating modules in a larger SoC design. Many projects +use Treadle for interactive debugging and a low-overhead simulator. diff --git a/docs/Chipyard-Basics/Building-A-Chip.rst b/docs/VLSI/Building-A-Chip.rst similarity index 100% rename from docs/Chipyard-Basics/Building-A-Chip.rst rename to docs/VLSI/Building-A-Chip.rst diff --git a/docs/VLSI/index.rst b/docs/VLSI/index.rst index f8bdb7a8..44303769 100644 --- a/docs/VLSI/index.rst +++ b/docs/VLSI/index.rst @@ -8,4 +8,5 @@ In particular, we aim to support the HAMMER physical design generator flow. :maxdepth: 2 :caption: VLSI Flow: + Building-A-Chip HAMMER diff --git a/docs/_static/images/boom-pipeline-detailed.png b/docs/_static/images/boom-pipeline-detailed.png new file mode 100644 index 00000000..7a85738f Binary files /dev/null and b/docs/_static/images/boom-pipeline-detailed.png differ diff --git a/docs/_static/images/nic-design.png b/docs/_static/images/nic-design.png new file mode 100644 index 00000000..0ebaf0e0 Binary files /dev/null and b/docs/_static/images/nic-design.png differ diff --git a/docs/_static/images/sha3.png b/docs/_static/images/sha3.png new file mode 100644 index 00000000..def95294 Binary files /dev/null and b/docs/_static/images/sha3.png differ diff --git a/docs/index.rst b/docs/index.rst index 61acfae3..75c3714e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,6 +38,8 @@ Table of Contents Customization/index + Software/index + Advanced-Usage/index TileLink-Diplomacy-Reference/index diff --git a/generators/example/src/main/resources/vsrc/GCDMMIOBlackBox.v b/generators/example/src/main/resources/vsrc/GCDMMIOBlackBox.v new file mode 100644 index 00000000..9104ae87 --- /dev/null +++ b/generators/example/src/main/resources/vsrc/GCDMMIOBlackBox.v @@ -0,0 +1,48 @@ +// DOC include start: GCD portlist +module GCDMMIOBlackBox + #(parameter WIDTH) + ( + input clock, + input reset, + output input_ready, + input input_valid, + input [WIDTH-1:0] x, + input [WIDTH-1:0] y, + input output_ready, + output output_valid, + output reg [WIDTH-1:0] gcd + ); +// DOC include end: GCD portlist + + localparam S_IDLE = 2'b00, S_RUN = 2'b01, S_DONE = 2'b10; + + reg [1:0] state; + reg [WIDTH-1:0] tmp; + + assign input_ready = state == S_IDLE; + assign output_valid = state == S_DONE; + + always @(posedge clock) begin + if (reset) + state <= S_IDLE; + else if (state == S_IDLE && input_valid) + state <= S_RUN; + else if (state == S_RUN && tmp == 0) + state <= S_DONE; + else if (state == S_DONE && output_ready) + state <= S_IDLE; + end + + always @(posedge clock) begin + if (state == S_IDLE && input_valid) begin + gcd <= x; + tmp <= y; + end else if (state == S_RUN) begin + if (gcd > tmp) + gcd <= gcd - tmp; + else + tmp <= tmp - gcd; + end + end + +endmodule // GCDMMIOBlackBox diff --git a/generators/example/src/main/scala/ConfigMixins.scala b/generators/example/src/main/scala/ConfigMixins.scala index a829db22..7d7e74af 100644 --- a/generators/example/src/main/scala/ConfigMixins.scala +++ b/generators/example/src/main/scala/ConfigMixins.scala @@ -37,6 +37,7 @@ class WithBootROM extends Config((site, here, up) => { contentFileName = s"./bootrom/bootrom.rv${site(XLen)}.img") }) +// DOC include start: WithGPIO /** * Class to add in GPIO */ @@ -44,6 +45,7 @@ class WithGPIO extends Config((site, here, up) => { case PeripheryGPIOKey => List( GPIOParams(address = 0x10012000, width = 4, includeIOF = false)) }) +// DOC include end: WithGPIO // ----------------------------------------------- // BOOM and/or Rocket Top Level System Parameter Mixins @@ -85,6 +87,14 @@ class WithPWMAXI4Top extends Config((site, here, up) => { Module(LazyModule(new TopWithPWMAXI4()(p)).module) }) +/** + * Class to specify a top level BOOM and/or Rocket system with a TL-attached GCD device + */ +class WithGCDTop extends Config((site, here, up) => { + case BuildTop => (clock: Clock, reset: Bool, p: Parameters) => + Module(LazyModule(new TopWithGCD()(p)).module) +}) + /** * Class to specify a top level BOOM and/or Rocket system with a block device */ @@ -107,6 +117,7 @@ class WithSimBlockDeviceTop extends Config((site, here, up) => { } }) +// DOC include start: WithGPIOTop /** * Class to specify a top level BOOM and/or Rocket system with GPIO */ @@ -121,6 +132,7 @@ class WithGPIOTop extends Config((site, here, up) => { top } }) +// DOC include end: WithGPIOTop // ------------------ // Multi-RoCC Support diff --git a/generators/example/src/main/scala/GCDMMIOBlackBox.scala b/generators/example/src/main/scala/GCDMMIOBlackBox.scala new file mode 100644 index 00000000..891fe1c9 --- /dev/null +++ b/generators/example/src/main/scala/GCDMMIOBlackBox.scala @@ -0,0 +1,98 @@ +package example + +import chisel3._ +import chisel3.util._ +import chisel3.core.{IntParam, Reset} +import freechips.rocketchip.amba.axi4._ +import freechips.rocketchip.subsystem.BaseSubsystem +import freechips.rocketchip.config.{Parameters, Field} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.regmapper.{HasRegMap, RegField} +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util.UIntIsOneOf + +// DOC include start: GCD blackbox +class GCDMMIOBlackBox(w: Int) extends BlackBox(Map("WIDTH" -> IntParam(w))) with HasBlackBoxResource { + val io = IO(new Bundle { + val clock = Input(Clock()) + val reset = Input(Bool()) + val input_ready = Output(Bool()) + val input_valid = Input(Bool()) + val x = Input(UInt(w.W)) + val y = Input(UInt(w.W)) + val output_ready = Input(Bool()) + val output_valid = Output(Bool()) + val gcd = Output(UInt(w.W)) + }) + + addResource("/vsrc/GCDMMIOBlackBox.v") +} +// DOC include end: GCD blackbox + +// DOC include start: GCD instance regmap +case class GCDParams(address: BigInt, beatBytes: Int, width: Int) + +trait GCDModule extends HasRegMap { + implicit val p: Parameters + def params: GCDParams + val clock: Clock + val reset: Reset + + val impl = Module(new GCDMMIOBlackBox(params.width)) + + // How many clock cycles in a PWM cycle? + val x = Reg(UInt(params.width.W)) + val y = Wire(new DecoupledIO(impl.io.y)) + val gcd = Wire(new DecoupledIO(impl.io.gcd)) + val status = Cat(impl.io.input_ready, impl.io.output_valid) + + impl.io.clock := clock + impl.io.reset := reset.asBool + impl.io.x := x + impl.io.y := y.bits + impl.io.input_valid := y.valid + y.ready := impl.io.input_ready + + gcd.bits := impl.io.gcd + gcd.valid := impl.io.output_valid + impl.io.output_ready := gcd.ready + + regmap( + 0x00 -> Seq( + RegField.r(2, status)), // a read-only register capturing current status + 0x04 -> Seq( + RegField.w(params.width, x)), // a plain, write-only register + 0x08 -> Seq( + RegField.w(params.width, y)), // write-only, y.valid is set on write + 0x0C -> Seq( + RegField.r(params.width, gcd))) // read-only, gcd.ready is set on read +} +// DOC include end: GCD instance regmap + +// DOC include start: GCD cake +class GCD(c: GCDParams)(implicit p: Parameters) + extends TLRegisterRouter( + c.address, "gcd", Seq("ucbbar,gcd"), + beatBytes = c.beatBytes)( + new TLRegBundle(c, _))( + new TLRegModule(c, _, _) with GCDModule) + +trait HasPeripheryGCD { this: BaseSubsystem => + implicit val p: Parameters + + private val address = 0x2000 + private val portName = "gcd" + private val gcdWidth = 32 + + val gcd = LazyModule(new GCD( + GCDParams(address, pbus.beatBytes, gcdWidth))(p)) + + pbus.toVariableWidthSlave(Some(portName)) { gcd.node } +} + +trait HasPeripheryGCDModuleImp extends LazyModuleImp { + implicit val p: Parameters + val outer: HasPeripheryGCD +} + +// DOC include end: GCD cake diff --git a/generators/example/src/main/scala/RocketConfigs.scala b/generators/example/src/main/scala/RocketConfigs.scala index d5a090b4..615111df 100644 --- a/generators/example/src/main/scala/RocketConfigs.scala +++ b/generators/example/src/main/scala/RocketConfigs.scala @@ -59,13 +59,20 @@ class PWMRocketConfig extends Config( new freechips.rocketchip.system.BaseConfig) // DOC include end: PWMRocketConfig -class PWMRAXI4ocketConfig extends Config( +class PWMAXI4RocketConfig extends Config( new WithPWMAXI4Top ++ // use top with axi4-controlled PWM new WithBootROM ++ new freechips.rocketchip.subsystem.WithInclusiveCache ++ new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new freechips.rocketchip.system.BaseConfig) +class GCDRocketConfig extends Config( // add MMIO GCD module + new WithGCDTop ++ + new WithBootROM ++ + new freechips.rocketchip.subsystem.WithInclusiveCache ++ + new freechips.rocketchip.subsystem.WithNBigCores(1) ++ + new freechips.rocketchip.system.BaseConfig) + class SimBlockDeviceRocketConfig extends Config( new testchipip.WithBlockDevice ++ // add block-device module to peripherybus new WithSimBlockDeviceTop ++ // use top with block-device IOs and connect to simblockdevice @@ -82,6 +89,7 @@ class BlockDeviceModelRocketConfig extends Config( new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new freechips.rocketchip.system.BaseConfig) +// DOC include start: GPIORocketConfig class GPIORocketConfig extends Config( new WithGPIO ++ // add GPIOs to the peripherybus new WithGPIOTop ++ // use top with GPIOs @@ -89,6 +97,7 @@ class GPIORocketConfig extends Config( new freechips.rocketchip.subsystem.WithInclusiveCache ++ new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new freechips.rocketchip.system.BaseConfig) +// DOC include end: GPIORocketConfig class DualCoreRocketConfig extends Config( new WithTop ++ @@ -113,6 +122,7 @@ class GB1MemoryRocketConfig extends Config( new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new freechips.rocketchip.system.BaseConfig) +// DOC include start: Sha3Rocket class Sha3RocketConfig extends Config( new WithTop ++ new WithBootROM ++ @@ -120,6 +130,7 @@ class Sha3RocketConfig extends Config( new sha3.WithSha3Accel ++ // add SHA3 rocc accelerator new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new freechips.rocketchip.system.BaseConfig) +// DOC include end: Sha3Rocket // DOC include start: InitZeroRocketConfig class InitZeroRocketConfig extends Config( diff --git a/generators/example/src/main/scala/Top.scala b/generators/example/src/main/scala/Top.scala index 94bed0de..b7f1a500 100644 --- a/generators/example/src/main/scala/Top.scala +++ b/generators/example/src/main/scala/Top.scala @@ -53,6 +53,16 @@ class TopWithPWMAXI4Module(l: TopWithPWMAXI4) extends TopModule(l) //--------------------------------------------------------------------------------------------------------- +class TopWithGCD(implicit p: Parameters) extends Top + with HasPeripheryGCD { + override lazy val module = new TopWithGCDModule(this) +} + +class TopWithGCDModule(l: TopWithGCD) extends TopModule(l) + with HasPeripheryGCDModuleImp + +//--------------------------------------------------------------------------------------------------------- + class TopWithBlockDevice(implicit p: Parameters) extends Top with HasPeripheryBlockDevice { override lazy val module = new TopWithBlockDeviceModule(this) diff --git a/tests/gcd.c b/tests/gcd.c new file mode 100644 index 00000000..a89abf65 --- /dev/null +++ b/tests/gcd.c @@ -0,0 +1,42 @@ +#include "mmio.h" + +#define GCD_STATUS 0x2000 +#define GCD_X 0x2004 +#define GCD_Y 0x2008 +#define GCD_GCD 0x200C + +unsigned int gcd_ref(unsigned int x, unsigned int y) { + while (y != 0) { + if (x > y) + x = x - y; + else + y = y - x; + } + return x; +} + +// DOC include start: GCD test +int main(void) +{ + uint32_t result, ref, x = 20, y = 15; + + // wait for peripheral to be ready + while ((reg_read8(GCD_STATUS) & 0x2) == 0) ; + + reg_write32(GCD_X, x); + reg_write32(GCD_Y, y); + + + // wait for peripheral to complete + while ((reg_read8(GCD_STATUS) & 0x1) == 0) ; + + result = reg_read32(GCD_GCD); + ref = gcd_ref(x, y); + + if (result != ref) { + printf("Hardware result %d does not match reference value %d\n", result, ref); + return 1; + } + return 0; +} +// DOC include end: GCD test